blob: ec253e955316c685a35aa2c9f3253fdfff6554dd [file] [log] [blame]
// Copyright 2022 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! Handles operations using platform Time Stamp Counter (TSC).
// TODO(b/213149158): Remove after uses are added.
#![allow(dead_code)]
use std::arch::x86_64::_rdtsc;
use anyhow::anyhow;
use anyhow::Result;
use base::debug;
use base::error;
use once_cell::sync::Lazy;
mod calibrate;
mod cpuid;
mod grouping;
pub use calibrate::*;
pub use cpuid::*;
fn rdtsc_safe() -> u64 {
// SAFETY:
// Safe because _rdtsc takes no arguments
unsafe { _rdtsc() }
}
// Singleton for getting the state of the host TSCs, to avoid calibrating multiple times.
static TSC_STATE: Lazy<Option<TscState>> = Lazy::new(|| match calibrate_tsc_state() {
Ok(tsc_state) => {
debug!("Using calibrated tsc frequency: {} Hz", tsc_state.frequency);
for (core, offset) in tsc_state.offsets.iter().enumerate() {
debug!("Core {} has tsc offset of {:?} ns", core, offset);
}
Some(tsc_state)
}
Err(e) => {
error!("Failed to calibrate tsc state: {:#}", e);
None
}
});
/// Returns the frequency of the host TSC. Calibration only happens once.
pub fn tsc_frequency() -> Result<u64> {
let state = TSC_STATE
.as_ref()
.ok_or(anyhow!("TSC calibration failed"))?;
Ok(state.frequency)
}
/// Returns the state of the host TSCs. Calibration only happens once.
pub fn tsc_state() -> Result<TscState> {
Ok(TSC_STATE
.as_ref()
.ok_or(anyhow!("TSC calibration failed"))?
.clone())
}
#[derive(Default, Debug)]
pub struct TscSyncMitigations {
/// Vec of per-vcpu affinities to apply to each vcpu thread. If None, no affinity should be
/// applied.
pub affinities: Vec<Option<Vec<usize>>>,
/// Vec of TSC offsets to set on each vcpu. If None, no offset should be applied.
pub offsets: Vec<Option<u64>>,
}
impl TscSyncMitigations {
fn new(num_vcpus: usize) -> Self {
TscSyncMitigations {
affinities: vec![None; num_vcpus],
offsets: vec![None; num_vcpus],
}
}
pub fn get_vcpu_affinity(&self, cpu_id: usize) -> Option<Vec<usize>> {
self.affinities.get(cpu_id).unwrap().clone()
}
pub fn get_vcpu_tsc_offset(&self, cpu_id: usize) -> Option<u64> {
*self.offsets.get(cpu_id).unwrap()
}
}
/// Given the state of the host TSCs in `tsc_state`, and the number of vcpus that are intended to
/// be run, return a set of affinities and TSC offsets to apply to those vcpus.
pub fn get_tsc_sync_mitigations(tsc_state: &TscState, num_vcpus: usize) -> TscSyncMitigations {
tsc_sync_mitigations_inner(tsc_state, num_vcpus, rdtsc_safe)
}
fn tsc_sync_mitigations_inner(
tsc_state: &TscState,
num_vcpus: usize,
rdtsc: fn() -> u64,
) -> TscSyncMitigations {
let mut mitigations = TscSyncMitigations::new(num_vcpus);
// If there's only one core grouping that means all the TSCs are in sync and no mitigations are
// needed.
if tsc_state.core_grouping.size() == 1 {
return mitigations;
}
let largest_group = tsc_state.core_grouping.largest_group();
let num_cores = tsc_state.offsets.len();
// If the largest core group is larger than the number of vcpus, just pin all vcpus to that core
// group, and no need to set offsets.
if largest_group.cores.len() >= num_vcpus {
let affinity: Vec<usize> = largest_group.cores.iter().map(|core| core.core).collect();
for i in 0..num_vcpus {
mitigations.affinities[i] = Some(affinity.clone());
}
} else {
// Otherwise, we pin each vcpu to a core and set it's offset to compensate.
let host_tsc_now = rdtsc();
for i in 0..num_vcpus {
// This handles the case where num_vcpus > num_cores, even though we try to avoid that
// in practice.
let pinned_core = i % num_cores;
mitigations.affinities[i] = Some(vec![pinned_core]);
// The guest TSC value is calculated like so:
// host_tsc + tsc_offset = guest_tsc
// If we assume that each host core has it's own error (core_offset), then it's more
// like this:
// host_tsc + core_offset + tsc_offset = guest_tsc
// We want guest_tsc to be 0 at boot, so the formula is this:
// host_tsc + core_offset + tsc_offset = 0
// and then you subtract host_tsc and core_offset from both sides and you get:
// tsc_offset = 0 - host_tsc - core_offset
mitigations.offsets[i] = Some(
0u64.wrapping_sub(host_tsc_now)
// Note: wrapping_add and casting tsc_state from an i64 to a u64 should be the
// same as using the future wrapping_add_signed function, which is only in
// nightly. This should be switched to using wrapping_add_signed once that is
// in stable.
.wrapping_add(tsc_state.offsets[pinned_core].1.wrapping_neg() as i64 as u64),
);
}
}
mitigations
}
#[cfg(test)]
mod tests {
use std::time::Duration;
use super::*;
use crate::tsc::grouping::CoreGroup;
use crate::tsc::grouping::CoreGrouping;
use crate::tsc::grouping::CoreOffset;
#[test]
fn test_sync_mitigation_set_offsets() {
let offsets = vec![(0, 0), (1, 1000), (2, -1000), (3, 2000)];
// frequency of 1GHz means 20 nanos is 20 ticks
let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
.expect("TscState::new should not fail for this test");
assert_eq!(
state.core_grouping,
CoreGrouping::new(vec![
CoreGroup {
cores: vec![CoreOffset {
core: 2,
offset: -1000
}]
},
CoreGroup {
cores: vec![CoreOffset { core: 0, offset: 0 }]
},
CoreGroup {
cores: vec![CoreOffset {
core: 1,
offset: 1000
}]
},
CoreGroup {
cores: vec![CoreOffset {
core: 3,
offset: 2000
}]
},
])
.expect("CoreGrouping::new should not fail here")
);
fn fake_rdtsc() -> u64 {
u64::MAX
}
let mitigations = tsc_sync_mitigations_inner(&state, 4, fake_rdtsc);
// core offsets are:
// - core 0: has an offset of 0, so TSC offset = 0 - u64::MAX - 0 = 1
// - core 1: has an offset of 1000, so TSC offset = 0 - u64::MAX - 1000 = -999
// - core 2: has an offset of -1000, so TSC offset = 0 - u64::MAX + 1000 = 1001
// - core 3: has an offset of 2000, so TSC offset = 0 - u64::MAX - 2000 = -1999
let expected = [1, 1u64.wrapping_sub(1000), 1001u64, 1u64.wrapping_sub(2000)];
for (i, expect) in expected.iter().enumerate() {
assert_eq!(
mitigations
.get_vcpu_tsc_offset(i)
.unwrap_or_else(|| panic!("core {} should have an offset of {}", i, expect)),
*expect
);
assert_eq!(
mitigations
.get_vcpu_affinity(i)
.unwrap_or_else(|| panic!("core {} should have an affinity of [{}]", i, i)),
vec![i]
);
}
}
#[test]
fn test_sync_mitigation_large_group() {
// 8 cores, and cores 1,3,5,7 are in-sync at offset -1000
let offsets = vec![
(0, 0),
(1, -1000),
(2, 1000),
(3, -1000),
(4, 2000),
(5, -1000),
(6, 3000),
(7, -1000),
];
// frequency of 1GHz means 20 nanos is 20 ticks
let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
.expect("TscState::new should not fail for this test");
assert_eq!(
state.core_grouping,
CoreGrouping::new(vec![
CoreGroup {
cores: vec![
CoreOffset {
core: 1,
offset: -1000
},
CoreOffset {
core: 3,
offset: -1000
},
CoreOffset {
core: 5,
offset: -1000
},
CoreOffset {
core: 7,
offset: -1000
}
]
},
CoreGroup {
cores: vec![CoreOffset { core: 0, offset: 0 }]
},
CoreGroup {
cores: vec![CoreOffset {
core: 2,
offset: 1000
}]
},
CoreGroup {
cores: vec![CoreOffset {
core: 4,
offset: 2000
}]
},
CoreGroup {
cores: vec![CoreOffset {
core: 6,
offset: 3000
}]
},
])
.expect("CoreGrouping::new should not fail here")
);
fn fake_rdtsc() -> u64 {
u64::MAX
}
let num_vcpus = 4;
let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);
let expected_affinity = vec![1, 3, 5, 7];
for i in 0..num_vcpus {
assert_eq!(
mitigations.get_vcpu_affinity(i).unwrap_or_else(|| panic!(
"core {} should have an affinity of {:?}",
i, expected_affinity
)),
expected_affinity
);
assert_eq!(mitigations.get_vcpu_tsc_offset(i), None);
}
}
#[test]
fn more_vcpus_than_cores() {
// 4 cores, two can be grouped but it doesn't matter because we'll have more vcpus than
// the largest group.
let offsets = vec![(0, 0), (1, 0), (2, 1000), (3, 2000)];
// frequency of 1GHz means 20 nanos is 20 ticks
let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
.expect("TscState::new should not fail for this test");
assert_eq!(
state.core_grouping,
CoreGrouping::new(vec![
CoreGroup {
cores: vec![
CoreOffset { core: 0, offset: 0 },
CoreOffset { core: 1, offset: 0 }
]
},
CoreGroup {
cores: vec![CoreOffset {
core: 2,
offset: 1000
}]
},
CoreGroup {
cores: vec![CoreOffset {
core: 3,
offset: 2000
}]
},
])
.expect("CoreGrouping::new should not fail here")
);
fn fake_rdtsc() -> u64 {
u64::MAX
}
// 8 vcpus, more than we have cores
let num_vcpus = 8;
let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);
let expected_offsets = [1, 1, 1u64.wrapping_sub(1000), 1u64.wrapping_sub(2000)];
for i in 0..num_vcpus {
assert_eq!(
mitigations.get_vcpu_affinity(i).unwrap_or_else(|| panic!(
"core {} should have an affinity of {:?}",
i,
i % 4
)),
// expected affinity is the vcpu modulo 4
vec![i % 4]
);
assert_eq!(
mitigations.get_vcpu_tsc_offset(i).unwrap_or_else(|| panic!(
"core {} should have an offset of {:?}",
i,
expected_offsets[i % 4]
)),
expected_offsets[i % 4]
);
}
}
}