devices/src/tsc.rs - crosvm/crosvm - Git at Google

 // Copyright 2022 The ChromiumOS Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 //! Handles operations using platform Time Stamp Counter (TSC).

 // TODO(b/213149158): Remove after uses are added.
 #![allow(dead_code)]

 use std::arch::x86_64::_rdtsc;

 use anyhow::anyhow;
 use anyhow::Result;
 use base::debug;
 use base::error;
 use once_cell::sync::Lazy;

 mod calibrate;
 mod cpuid;
 mod grouping;

 pub use calibrate::*;
 pub use cpuid::*;

 fn rdtsc_safe() -> u64 {
     // SAFETY:
     // Safe because _rdtsc takes no arguments
     unsafe { _rdtsc() }
 }

 // Singleton for getting the state of the host TSCs, to avoid calibrating multiple times.
 static TSC_STATE: Lazy<Option<TscState>> = Lazy::new(|| match calibrate_tsc_state() {
     Ok(tsc_state) => {
         debug!("Using calibrated tsc frequency: {} Hz", tsc_state.frequency);
         for (core, offset) in tsc_state.offsets.iter().enumerate() {
             debug!("Core {} has tsc offset of {:?} ns", core, offset);
         }
         Some(tsc_state)
     }
     Err(e) => {
         error!("Failed to calibrate tsc state: {:#}", e);
         None
     }
 });

 /// Returns the frequency of the host TSC. Calibration only happens once.
 pub fn tsc_frequency() -> Result<u64> {
     let state = TSC_STATE
         .as_ref()
         .ok_or(anyhow!("TSC calibration failed"))?;
     Ok(state.frequency)
 }

 /// Returns the state of the host TSCs. Calibration only happens once.
 pub fn tsc_state() -> Result<TscState> {
     Ok(TSC_STATE
         .as_ref()
         .ok_or(anyhow!("TSC calibration failed"))?
         .clone())
 }

 #[derive(Default, Debug)]
 pub struct TscSyncMitigations {
     /// Vec of per-vcpu affinities to apply to each vcpu thread. If None, no affinity should be
     /// applied.
     pub affinities: Vec<Option<Vec<usize>>>,
     /// Vec of TSC offsets to set on each vcpu. If None, no offset should be applied.
     pub offsets: Vec<Option<u64>>,
 }

 impl TscSyncMitigations {
     fn new(num_vcpus: usize) -> Self {
         TscSyncMitigations {
             affinities: vec![None; num_vcpus],
             offsets: vec![None; num_vcpus],
         }
     }

     pub fn get_vcpu_affinity(&self, cpu_id: usize) -> Option<Vec<usize>> {
         self.affinities.get(cpu_id).unwrap().clone()
     }

     pub fn get_vcpu_tsc_offset(&self, cpu_id: usize) -> Option<u64> {
         *self.offsets.get(cpu_id).unwrap()
     }
 }

 /// Given the state of the host TSCs in `tsc_state`, and the number of vcpus that are intended to
 /// be run, return a set of affinities and TSC offsets to apply to those vcpus.
 pub fn get_tsc_sync_mitigations(tsc_state: &TscState, num_vcpus: usize) -> TscSyncMitigations {
     tsc_sync_mitigations_inner(tsc_state, num_vcpus, rdtsc_safe)
 }

 fn tsc_sync_mitigations_inner(
     tsc_state: &TscState,
     num_vcpus: usize,
     rdtsc: fn() -> u64,
 ) -> TscSyncMitigations {
     let mut mitigations = TscSyncMitigations::new(num_vcpus);
     // If there's only one core grouping that means all the TSCs are in sync and no mitigations are
     // needed.
     if tsc_state.core_grouping.size() == 1 {
         return mitigations;
     }

     let largest_group = tsc_state.core_grouping.largest_group();
     let num_cores = tsc_state.offsets.len();

     // If the largest core group is larger than the number of vcpus, just pin all vcpus to that core
     // group, and no need to set offsets.
     if largest_group.cores.len() >= num_vcpus {
         let affinity: Vec<usize> = largest_group.cores.iter().map(|core| core.core).collect();
         for i in 0..num_vcpus {
             mitigations.affinities[i] = Some(affinity.clone());
         }
     } else {
         // Otherwise, we pin each vcpu to a core and set it's offset to compensate.
         let host_tsc_now = rdtsc();

         for i in 0..num_vcpus {
             // This handles the case where num_vcpus > num_cores, even though we try to avoid that
             // in practice.
             let pinned_core = i % num_cores;

             mitigations.affinities[i] = Some(vec![pinned_core]);
             // The guest TSC value is calculated like so:
             //   host_tsc + tsc_offset = guest_tsc
             // If we assume that each host core has it's own error (core_offset), then it's more
             // like this:
             //   host_tsc + core_offset + tsc_offset = guest_tsc
             // We want guest_tsc to be 0 at boot, so the formula is this:
             //   host_tsc + core_offset + tsc_offset = 0
             // and then you subtract host_tsc and core_offset from both sides and you get:
             //   tsc_offset = 0 - host_tsc - core_offset
             mitigations.offsets[i] = Some(
                 0u64.wrapping_sub(host_tsc_now)
                     // Note: wrapping_add and casting tsc_state from an i64 to a u64 should be the
                     //  same as using the future wrapping_add_signed function, which is only in
                     //  nightly. This should be switched to using wrapping_add_signed once that is
                     //  in stable.
                     .wrapping_add(tsc_state.offsets[pinned_core].1.wrapping_neg() as i64 as u64),
             );
         }
     }

     mitigations
 }

 #[cfg(test)]
 mod tests {
     use std::time::Duration;

     use super::*;
     use crate::tsc::grouping::CoreGroup;
     use crate::tsc::grouping::CoreGrouping;
     use crate::tsc::grouping::CoreOffset;

     #[test]
     fn test_sync_mitigation_set_offsets() {
         let offsets = vec![(0, 0), (1, 1000), (2, -1000), (3, 2000)];
         // frequency of 1GHz means 20 nanos is 20 ticks
         let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
             .expect("TscState::new should not fail for this test");

         assert_eq!(
             state.core_grouping,
             CoreGrouping::new(vec![
                 CoreGroup {
                     cores: vec![CoreOffset {
                         core: 2,
                         offset: -1000
                     }]
                 },
                 CoreGroup {
                     cores: vec![CoreOffset { core: 0, offset: 0 }]
                 },
                 CoreGroup {
                     cores: vec![CoreOffset {
                         core: 1,
                         offset: 1000
                     }]
                 },
                 CoreGroup {
                     cores: vec![CoreOffset {
                         core: 3,
                         offset: 2000
                     }]
                 },
             ])
             .expect("CoreGrouping::new should not fail here")
         );

         fn fake_rdtsc() -> u64 {
             u64::MAX
         }

         let mitigations = tsc_sync_mitigations_inner(&state, 4, fake_rdtsc);

         // core offsets are:
         //  - core 0: has an offset of 0, so TSC offset = 0 - u64::MAX - 0 = 1
         //  - core 1: has an offset of 1000, so TSC offset = 0 - u64::MAX - 1000 = -999
         //  - core 2: has an offset of -1000, so TSC offset = 0 - u64::MAX + 1000 = 1001
         //  - core 3: has an offset of 2000, so TSC offset = 0 - u64::MAX - 2000 = -1999
         let expected = [1, 1u64.wrapping_sub(1000), 1001u64, 1u64.wrapping_sub(2000)];

         for (i, expect) in expected.iter().enumerate() {
             assert_eq!(
                 mitigations
                     .get_vcpu_tsc_offset(i)
                     .unwrap_or_else(|| panic!("core {} should have an offset of {}", i, expect)),
                 *expect
             );

             assert_eq!(
                 mitigations
                     .get_vcpu_affinity(i)
                     .unwrap_or_else(|| panic!("core {} should have an affinity of [{}]", i, i)),
                 vec![i]
             );
         }
     }

     #[test]
     fn test_sync_mitigation_large_group() {
         // 8 cores, and cores 1,3,5,7 are in-sync at offset -1000
         let offsets = vec![
             (0, 0),
             (1, -1000),
             (2, 1000),
             (3, -1000),
             (4, 2000),
             (5, -1000),
             (6, 3000),
             (7, -1000),
         ];
         // frequency of 1GHz means 20 nanos is 20 ticks
         let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
             .expect("TscState::new should not fail for this test");

         assert_eq!(
             state.core_grouping,
             CoreGrouping::new(vec![
                 CoreGroup {
                     cores: vec![
                         CoreOffset {
                             core: 1,
                             offset: -1000
                         },
                         CoreOffset {
                             core: 3,
                             offset: -1000
                         },
                         CoreOffset {
                             core: 5,
                             offset: -1000
                         },
                         CoreOffset {
                             core: 7,
                             offset: -1000
                         }
                     ]
                 },
                 CoreGroup {
                     cores: vec![CoreOffset { core: 0, offset: 0 }]
                 },
                 CoreGroup {
                     cores: vec![CoreOffset {
                         core: 2,
                         offset: 1000
                     }]
                 },
                 CoreGroup {
                     cores: vec![CoreOffset {
                         core: 4,
                         offset: 2000
                     }]
                 },
                 CoreGroup {
                     cores: vec![CoreOffset {
                         core: 6,
                         offset: 3000
                     }]
                 },
             ])
             .expect("CoreGrouping::new should not fail here")
         );

         fn fake_rdtsc() -> u64 {
             u64::MAX
         }

         let num_vcpus = 4;
         let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);

         let expected_affinity = vec![1, 3, 5, 7];
         for i in 0..num_vcpus {
             assert_eq!(
                 mitigations.get_vcpu_affinity(i).unwrap_or_else(|| panic!(
                     "core {} should have an affinity of {:?}",
                     i, expected_affinity
                 )),
                 expected_affinity
             );
             assert_eq!(mitigations.get_vcpu_tsc_offset(i), None);
         }
     }

     #[test]
     fn more_vcpus_than_cores() {
         // 4 cores, two can be grouped but it doesn't matter because we'll have more vcpus than
         // the largest group.
         let offsets = vec![(0, 0), (1, 0), (2, 1000), (3, 2000)];
         // frequency of 1GHz means 20 nanos is 20 ticks
         let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
             .expect("TscState::new should not fail for this test");

         assert_eq!(
             state.core_grouping,
             CoreGrouping::new(vec![
                 CoreGroup {
                     cores: vec![
                         CoreOffset { core: 0, offset: 0 },
                         CoreOffset { core: 1, offset: 0 }
                     ]
                 },
                 CoreGroup {
                     cores: vec![CoreOffset {
                         core: 2,
                         offset: 1000
                     }]
                 },
                 CoreGroup {
                     cores: vec![CoreOffset {
                         core: 3,
                         offset: 2000
                     }]
                 },
             ])
             .expect("CoreGrouping::new should not fail here")
         );

         fn fake_rdtsc() -> u64 {
             u64::MAX
         }

         // 8 vcpus, more than we have cores
         let num_vcpus = 8;
         let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);
         let expected_offsets = [1, 1, 1u64.wrapping_sub(1000), 1u64.wrapping_sub(2000)];

         for i in 0..num_vcpus {
             assert_eq!(
                 mitigations.get_vcpu_affinity(i).unwrap_or_else(|| panic!(
                     "core {} should have an affinity of {:?}",
                     i,
                     i % 4
                 )),
                 // expected affinity is the vcpu modulo 4
                 vec![i % 4]
             );
             assert_eq!(
                 mitigations.get_vcpu_tsc_offset(i).unwrap_or_else(|| panic!(
                     "core {} should have an offset of {:?}",
                     i,
                     expected_offsets[i % 4]
                 )),
                 expected_offsets[i % 4]
             );
         }
     }
 }
	// Copyright 2022 The ChromiumOS Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	//! Handles operations using platform Time Stamp Counter (TSC).

	// TODO(b/213149158): Remove after uses are added.
	#![allow(dead_code)]

	use std::arch::x86_64::_rdtsc;

	use anyhow::anyhow;
	use anyhow::Result;
	use base::debug;
	use base::error;
	use once_cell::sync::Lazy;

	mod calibrate;
	mod cpuid;
	mod grouping;

	pub use calibrate::*;
	pub use cpuid::*;

	fn rdtsc_safe() -> u64 {
	// SAFETY:
	// Safe because _rdtsc takes no arguments
	unsafe { _rdtsc() }
	}

	// Singleton for getting the state of the host TSCs, to avoid calibrating multiple times.
	static TSC_STATE: Lazy<Option<TscState>> = Lazy::new(\|\| match calibrate_tsc_state() {
	Ok(tsc_state) => {
	debug!("Using calibrated tsc frequency: {} Hz", tsc_state.frequency);
	for (core, offset) in tsc_state.offsets.iter().enumerate() {
	debug!("Core {} has tsc offset of {:?} ns", core, offset);
	}
	Some(tsc_state)
	}
	Err(e) => {
	error!("Failed to calibrate tsc state: {:#}", e);
	None
	}
	});

	/// Returns the frequency of the host TSC. Calibration only happens once.
	pub fn tsc_frequency() -> Result<u64> {
	let state = TSC_STATE
	.as_ref()
	.ok_or(anyhow!("TSC calibration failed"))?;
	Ok(state.frequency)
	}

	/// Returns the state of the host TSCs. Calibration only happens once.
	pub fn tsc_state() -> Result<TscState> {
	Ok(TSC_STATE
	.as_ref()
	.ok_or(anyhow!("TSC calibration failed"))?
	.clone())
	}

	#[derive(Default, Debug)]
	pub struct TscSyncMitigations {
	/// Vec of per-vcpu affinities to apply to each vcpu thread. If None, no affinity should be
	/// applied.
	pub affinities: Vec<Option<Vec<usize>>>,
	/// Vec of TSC offsets to set on each vcpu. If None, no offset should be applied.
	pub offsets: Vec<Option<u64>>,
	}

	impl TscSyncMitigations {
	fn new(num_vcpus: usize) -> Self {
	TscSyncMitigations {
	affinities: vec![None; num_vcpus],
	offsets: vec![None; num_vcpus],
	}
	}

	pub fn get_vcpu_affinity(&self, cpu_id: usize) -> Option<Vec<usize>> {
	self.affinities.get(cpu_id).unwrap().clone()
	}

	pub fn get_vcpu_tsc_offset(&self, cpu_id: usize) -> Option<u64> {
	*self.offsets.get(cpu_id).unwrap()
	}
	}

	/// Given the state of the host TSCs in `tsc_state`, and the number of vcpus that are intended to
	/// be run, return a set of affinities and TSC offsets to apply to those vcpus.
	pub fn get_tsc_sync_mitigations(tsc_state: &TscState, num_vcpus: usize) -> TscSyncMitigations {
	tsc_sync_mitigations_inner(tsc_state, num_vcpus, rdtsc_safe)
	}

	fn tsc_sync_mitigations_inner(
	tsc_state: &TscState,
	num_vcpus: usize,
	rdtsc: fn() -> u64,
	) -> TscSyncMitigations {
	let mut mitigations = TscSyncMitigations::new(num_vcpus);
	// If there's only one core grouping that means all the TSCs are in sync and no mitigations are
	// needed.
	if tsc_state.core_grouping.size() == 1 {
	return mitigations;
	}

	let largest_group = tsc_state.core_grouping.largest_group();
	let num_cores = tsc_state.offsets.len();

	// If the largest core group is larger than the number of vcpus, just pin all vcpus to that core
	// group, and no need to set offsets.
	if largest_group.cores.len() >= num_vcpus {
	let affinity: Vec<usize> = largest_group.cores.iter().map(\|core\| core.core).collect();
	for i in 0..num_vcpus {
	mitigations.affinities[i] = Some(affinity.clone());
	}
	} else {
	// Otherwise, we pin each vcpu to a core and set it's offset to compensate.
	let host_tsc_now = rdtsc();

	for i in 0..num_vcpus {
	// This handles the case where num_vcpus > num_cores, even though we try to avoid that
	// in practice.
	let pinned_core = i % num_cores;

	mitigations.affinities[i] = Some(vec![pinned_core]);
	// The guest TSC value is calculated like so:
	// host_tsc + tsc_offset = guest_tsc
	// If we assume that each host core has it's own error (core_offset), then it's more
	// like this:
	// host_tsc + core_offset + tsc_offset = guest_tsc
	// We want guest_tsc to be 0 at boot, so the formula is this:
	// host_tsc + core_offset + tsc_offset = 0
	// and then you subtract host_tsc and core_offset from both sides and you get:
	// tsc_offset = 0 - host_tsc - core_offset
	mitigations.offsets[i] = Some(
	0u64.wrapping_sub(host_tsc_now)
	// Note: wrapping_add and casting tsc_state from an i64 to a u64 should be the
	// same as using the future wrapping_add_signed function, which is only in
	// nightly. This should be switched to using wrapping_add_signed once that is
	// in stable.
	.wrapping_add(tsc_state.offsets[pinned_core].1.wrapping_neg() as i64 as u64),
	);
	}
	}

	mitigations
	}

	#[cfg(test)]
	mod tests {
	use std::time::Duration;

	use super::*;
	use crate::tsc::grouping::CoreGroup;
	use crate::tsc::grouping::CoreGrouping;
	use crate::tsc::grouping::CoreOffset;

	#[test]
	fn test_sync_mitigation_set_offsets() {
	let offsets = vec![(0, 0), (1, 1000), (2, -1000), (3, 2000)];
	// frequency of 1GHz means 20 nanos is 20 ticks
	let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
	.expect("TscState::new should not fail for this test");

	assert_eq!(
	state.core_grouping,
	CoreGrouping::new(vec![
	CoreGroup {
	cores: vec![CoreOffset {
	core: 2,
	offset: -1000
	}]
	},
	CoreGroup {
	cores: vec![CoreOffset { core: 0, offset: 0 }]
	},
	CoreGroup {
	cores: vec![CoreOffset {
	core: 1,
	offset: 1000
	}]
	},
	CoreGroup {
	cores: vec![CoreOffset {
	core: 3,
	offset: 2000
	}]
	},
	])
	.expect("CoreGrouping::new should not fail here")
	);

	fn fake_rdtsc() -> u64 {
	u64::MAX
	}

	let mitigations = tsc_sync_mitigations_inner(&state, 4, fake_rdtsc);

	// core offsets are:
	// - core 0: has an offset of 0, so TSC offset = 0 - u64::MAX - 0 = 1
	// - core 1: has an offset of 1000, so TSC offset = 0 - u64::MAX - 1000 = -999
	// - core 2: has an offset of -1000, so TSC offset = 0 - u64::MAX + 1000 = 1001
	// - core 3: has an offset of 2000, so TSC offset = 0 - u64::MAX - 2000 = -1999
	let expected = [1, 1u64.wrapping_sub(1000), 1001u64, 1u64.wrapping_sub(2000)];

	for (i, expect) in expected.iter().enumerate() {
	assert_eq!(
	mitigations
	.get_vcpu_tsc_offset(i)
	.unwrap_or_else(\|\| panic!("core {} should have an offset of {}", i, expect)),
	*expect
	);

	assert_eq!(
	mitigations
	.get_vcpu_affinity(i)
	.unwrap_or_else(\|\| panic!("core {} should have an affinity of [{}]", i, i)),
	vec![i]
	);
	}
	}

	#[test]
	fn test_sync_mitigation_large_group() {
	// 8 cores, and cores 1,3,5,7 are in-sync at offset -1000
	let offsets = vec![
	(0, 0),
	(1, -1000),
	(2, 1000),
	(3, -1000),
	(4, 2000),
	(5, -1000),
	(6, 3000),
	(7, -1000),
	];
	// frequency of 1GHz means 20 nanos is 20 ticks
	let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
	.expect("TscState::new should not fail for this test");

	assert_eq!(
	state.core_grouping,
	CoreGrouping::new(vec![
	CoreGroup {
	cores: vec![
	CoreOffset {
	core: 1,
	offset: -1000
	},
	CoreOffset {
	core: 3,
	offset: -1000
	},
	CoreOffset {
	core: 5,
	offset: -1000
	},
	CoreOffset {
	core: 7,
	offset: -1000
	}
	]
	},
	CoreGroup {
	cores: vec![CoreOffset { core: 0, offset: 0 }]
	},
	CoreGroup {
	cores: vec![CoreOffset {
	core: 2,
	offset: 1000
	}]
	},
	CoreGroup {
	cores: vec![CoreOffset {
	core: 4,
	offset: 2000
	}]
	},
	CoreGroup {
	cores: vec![CoreOffset {
	core: 6,
	offset: 3000
	}]
	},
	])
	.expect("CoreGrouping::new should not fail here")
	);

	fn fake_rdtsc() -> u64 {
	u64::MAX
	}

	let num_vcpus = 4;
	let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);

	let expected_affinity = vec![1, 3, 5, 7];
	for i in 0..num_vcpus {
	assert_eq!(
	mitigations.get_vcpu_affinity(i).unwrap_or_else(\|\| panic!(
	"core {} should have an affinity of {:?}",
	i, expected_affinity
	)),
	expected_affinity
	);
	assert_eq!(mitigations.get_vcpu_tsc_offset(i), None);
	}
	}

	#[test]
	fn more_vcpus_than_cores() {
	// 4 cores, two can be grouped but it doesn't matter because we'll have more vcpus than
	// the largest group.
	let offsets = vec![(0, 0), (1, 0), (2, 1000), (3, 2000)];
	// frequency of 1GHz means 20 nanos is 20 ticks
	let state = TscState::new(1_000_000_000, offsets, Duration::from_nanos(20))
	.expect("TscState::new should not fail for this test");

	assert_eq!(
	state.core_grouping,
	CoreGrouping::new(vec![
	CoreGroup {
	cores: vec![
	CoreOffset { core: 0, offset: 0 },
	CoreOffset { core: 1, offset: 0 }
	]
	},
	CoreGroup {
	cores: vec![CoreOffset {
	core: 2,
	offset: 1000
	}]
	},
	CoreGroup {
	cores: vec![CoreOffset {
	core: 3,
	offset: 2000
	}]
	},
	])
	.expect("CoreGrouping::new should not fail here")
	);

	fn fake_rdtsc() -> u64 {
	u64::MAX
	}

	// 8 vcpus, more than we have cores
	let num_vcpus = 8;
	let mitigations = tsc_sync_mitigations_inner(&state, num_vcpus, fake_rdtsc);
	let expected_offsets = [1, 1, 1u64.wrapping_sub(1000), 1u64.wrapping_sub(2000)];

	for i in 0..num_vcpus {
	assert_eq!(
	mitigations.get_vcpu_affinity(i).unwrap_or_else(\|\| panic!(
	"core {} should have an affinity of {:?}",
	i,
	i % 4
	)),
	// expected affinity is the vcpu modulo 4
	vec![i % 4]
	);
	assert_eq!(
	mitigations.get_vcpu_tsc_offset(i).unwrap_or_else(\|\| panic!(
	"core {} should have an offset of {:?}",
	i,
	expected_offsets[i % 4]
	)),
	expected_offsets[i % 4]
	);
	}
	}
	}