| From 1f2267bcc1af48e9debfe2ddcb5088af570225e9 Mon Sep 17 00:00:00 2001 |
| From: "Vineeth Pillai (Google)" <vineeth@bitbyteword.org> |
| Date: Tue, 7 Nov 2023 21:25:45 -0500 |
| Subject: [PATCH] CHROMIUM: kvm: x86: paravirt sched framework |
| |
| Implement the basic framework needed for guest/host communication to |
| facilitate paravirt sched: |
| - A kvm MSR that guest uses to provide the GPA of shared memory for |
| communicating the scheduling information between host and guest. |
| |
| wrmsr(0) disables the feature. wrmsr(valid_gpa) enables the feature |
| and uses the gpa for further communication. |
| |
| - Add a new cpuid feature flag for the host to advertise the feature |
| to the guest. |
| |
| When the guest kernel is about to run a critical or latency sensitive |
| workload, it can request the hypervisor to boost the priority of the |
| vcpu thread. Similarly, guest kernel can request to unboost when the |
| vcpu switches to a normal workload. Guest kernel can also share the |
| priority attributes of the task that it is going to schedule and host |
| can adjust the priority of vcpu thread acordingly. |
| |
| When a guest determines that it needs a boost, it need not immediately |
| request a synchronous boost as it is already running at that moment. |
| Synchronous request is detrimental because it incurs a VMEXIT. Rather, |
| let the guest note down its request on a shared memory and the host can |
| check this request on next VMEXIT and boost if needed. |
| |
| Nmis, irqs, softirqs and preemption disabled are considered latency |
| sensitive critical sections in kernel and are candidates for boosting. |
| |
| UPSTREAM-TASK=b:303645537 |
| BUG=b:262267726 |
| TEST=boot |
| |
| Co-developed-by: Joel Fernandes (Google) <joel@joelfernandes.org> |
| Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> |
| Signed-off-by: Vineeth Pillai (Google) <vineeth@bitbyteword.org> |
| (cherry picked from commit e654656547e9b54e4e69fa9d8f13ecf460c9f444) |
| |
| Change-Id: I4a63df3d12bec9026fcb7c82f82b4bdf02cbb0b4 |
| Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/5425465 |
| Reviewed-by: Joel Fernandes <joelaf@google.com> |
| Reviewed-by: Masami Hiramatsu <mhiramat@google.com> |
| Commit-Queue: Vineeth Pillai <vineethrp@google.com> |
| Tested-by: Vineeth Pillai <vineethrp@google.com> |
| --- |
| arch/x86/include/asm/kvm_host.h | 133 ++++++++++++++++++++++++ |
| arch/x86/include/uapi/asm/kvm_para.h | 2 + |
| arch/x86/kvm/Kconfig | 13 +++ |
| arch/x86/kvm/cpuid.c | 2 + |
| arch/x86/kvm/x86.c | 97 ++++++++++++++++++ |
| include/linux/kvm_host.h | 15 +++ |
| include/uapi/linux/kvm_para.h | 49 +++++++++ |
| virt/kvm/kvm_main.c | 145 +++++++++++++++++++++++++++ |
| 8 files changed, 456 insertions(+) |
| |
| diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h |
| index f849f3dfe07bf5c1e4638bbc4f611d3a98810e54..45b5aca1276b2fec45bab32312cee33dd4455981 100644 |
| --- a/arch/x86/include/asm/kvm_host.h |
| +++ b/arch/x86/include/asm/kvm_host.h |
| @@ -16,6 +16,7 @@ |
| #include <linux/irq_work.h> |
| #include <linux/irq.h> |
| #include <linux/workqueue.h> |
| +#include <linux/sched/deadline.h> |
| |
| #include <linux/kvm.h> |
| #include <linux/kvm_para.h> |
| @@ -740,6 +741,35 @@ struct kvm_queued_exception { |
| bool has_payload; |
| }; |
| |
| +/* |
| + * PARAVIRT_SCHED info |
| + */ |
| +struct vcpu_pv_sched { |
| + /* |
| + * Current scheduling attributes for this vcpu. |
| + */ |
| + union vcpu_sched_attr attr; |
| + /* |
| + * Kernel priority : [-1, 140) |
| + * Used for priority comparisons. |
| + */ |
| + int normal_prio; |
| + /* |
| + * Default scheduling attributes for this vcpu, |
| + * when the VM was started. |
| + */ |
| + union vcpu_sched_attr default_attr; |
| + int default_normal_prio; |
| + /* |
| + * Policy and priority for guest kernel critical |
| + * sections - nmi, irq, softirq and preemption disabled. |
| + */ |
| + int kern_cs_prio; |
| + int kern_cs_policy; |
| + u64 msr_val; |
| + struct gfn_to_hva_cache data; |
| +}; |
| + |
| struct kvm_vcpu_arch { |
| /* |
| * rip and regs accesses must go through |
| @@ -1033,6 +1063,10 @@ struct kvm_vcpu_arch { |
| /* Protected Guests */ |
| bool guest_state_protected; |
| |
| +#ifdef CONFIG_PARAVIRT_SCHED_KVM |
| + struct vcpu_pv_sched pv_sched; |
| +#endif |
| + |
| /* |
| * Set when PDPTS were loaded directly by the userspace without |
| * reading the guest memory |
| @@ -2354,4 +2388,103 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); |
| */ |
| #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) |
| |
| +#ifdef CONFIG_PARAVIRT_SCHED_KVM |
| +/* |
| + * Default policy and priority used for boosting |
| + * VCPU threads. |
| + */ |
| +#define VCPU_KERN_CS_PRIO 8 |
| +#define VCPU_KERN_CS_POLICY SCHED_RR |
| + |
| +static inline bool kvm_arch_vcpu_pv_sched_enabled(struct kvm_vcpu_arch *arch) |
| +{ |
| + return arch->pv_sched.msr_val; |
| +} |
| + |
| +static inline union vcpu_sched_attr kvm_arch_vcpu_default_sched_attr(struct kvm_vcpu_arch *arch) |
| +{ |
| + return arch->pv_sched.default_attr; |
| +} |
| + |
| +/* |
| + * copied from kernel/sched/core.c:__normal_prio() |
| + */ |
| +static inline int __sched_normal_prio(union vcpu_sched_attr attr) |
| +{ |
| + int prio; |
| + |
| + if (attr.sched_policy == SCHED_DEADLINE) |
| + prio = MAX_DL_PRIO - 1; |
| + else if (attr.sched_policy == SCHED_FIFO || attr.sched_policy == SCHED_RR) |
| + prio = MAX_RT_PRIO - 1 - attr.rt_priority; |
| + else |
| + prio = NICE_TO_PRIO(attr.sched_nice); |
| + |
| + return prio; |
| +} |
| + |
| +/* |
| + * Returns |
| + * 0 if vcpus prio is equal to prio specified by attr |
| + * 1 if vcpu prio is greater than prio specified by attr |
| + * -1 if vcpu prio is less than prio specified by attr |
| + */ |
| +static inline int kvm_arch_vcpu_normalprio_cmp(struct kvm_vcpu_arch *arch, |
| + union vcpu_sched_attr attr) |
| +{ |
| + int normal_prio = __sched_normal_prio(attr); |
| + |
| + if (normal_prio == arch->pv_sched.normal_prio) |
| + return 0; |
| + else if (normal_prio > arch->pv_sched.normal_prio) |
| + return 1; |
| + else |
| + return -1; |
| +} |
| + |
| +static inline void kvm_arch_vcpu_set_sched_attr(struct kvm_vcpu_arch *arch, |
| + union vcpu_sched_attr attr) |
| +{ |
| + arch->pv_sched.attr = attr; |
| + arch->pv_sched.normal_prio = __sched_normal_prio(attr); |
| +} |
| + |
| +static inline void kvm_arch_vcpu_set_default_sched_attr(struct kvm_vcpu_arch *arch, |
| + union vcpu_sched_attr attr) |
| +{ |
| + arch->pv_sched.default_attr = attr; |
| + arch->pv_sched.default_normal_prio = __sched_normal_prio(attr); |
| +} |
| + |
| +static inline int kvm_arch_vcpu_kerncs_prio(struct kvm_vcpu_arch *arch) |
| +{ |
| + return arch->pv_sched.kern_cs_prio; |
| +} |
| + |
| +static inline int kvm_arch_vcpu_kerncs_policy(struct kvm_vcpu_arch *arch) |
| +{ |
| + return arch->pv_sched.kern_cs_policy; |
| +} |
| + |
| +static inline int kvm_arch_vcpu_set_kerncs_prio(struct kvm_vcpu_arch *arch, int prio) |
| +{ |
| + if ((unsigned int)prio > MAX_RT_PRIO) |
| + return -EINVAL; |
| + |
| + arch->pv_sched.kern_cs_prio = prio; |
| + |
| + return 0; |
| +} |
| + |
| +static inline int kvm_arch_vcpu_set_kerncs_policy(struct kvm_vcpu_arch *arch, int policy) |
| +{ |
| + if (policy != SCHED_FIFO && policy != SCHED_RR) |
| + return -EINVAL; |
| + |
| + arch->pv_sched.kern_cs_policy = policy; |
| + |
| + return 0; |
| +} |
| +#endif |
| + |
| #endif /* _ASM_X86_KVM_HOST_H */ |
| diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h |
| index 79c12988bc87aaa68c7ef5f4d39cb6fba81b1455..2f51db8f725a5fa88591c56a5db4674416dfd938 100644 |
| --- a/arch/x86/include/uapi/asm/kvm_para.h |
| +++ b/arch/x86/include/uapi/asm/kvm_para.h |
| @@ -36,6 +36,7 @@ |
| #define KVM_FEATURE_MSI_EXT_DEST_ID 15 |
| #define KVM_FEATURE_HC_MAP_GPA_RANGE 16 |
| #define KVM_FEATURE_MIGRATION_CONTROL 17 |
| +#define KVM_FEATURE_PV_SCHED 29 |
| #define KVM_FEATURE_HOST_SUSPEND_TIME 30 |
| |
| #define KVM_HINTS_REALTIME 0 |
| @@ -60,6 +61,7 @@ |
| #define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 |
| #define MSR_KVM_MIGRATION_CONTROL 0x4b564d08 |
| #define MSR_KVM_HOST_SUSPEND_TIME 0x4b564d98 |
| +#define MSR_KVM_PV_SCHED 0x4b564da0 |
| |
| struct kvm_steal_time { |
| __u64 steal; |
| diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig |
| index 22012ad6ff9ace5b14be4bb02c5ddc13a1fa59e8..9eae15d3234112ee5cad97c44b522af50845c66f 100644 |
| --- a/arch/x86/kvm/Kconfig |
| +++ b/arch/x86/kvm/Kconfig |
| @@ -221,4 +221,17 @@ config KVM_VIRT_SUSPEND_TIMING |
| |
| If unsure, say N. |
| |
| +config PARAVIRT_SCHED_KVM |
| + bool "Enable paravirt scheduling capability for kvm" |
| + depends on KVM |
| + default n |
| + help |
| + Paravirtualized scheduling facilitates the exchange of scheduling |
| + related information between the host and guest through shared memory, |
| + enhancing the efficiency of vCPU thread scheduling by the hypervisor. |
| + An illustrative use case involves dynamically boosting the priority of |
| + a vCPU thread when the guest is executing a latency-sensitive workload |
| + on that specific vCPU. |
| + This config enables paravirt scheduling in the kvm hypervisor. |
| + |
| endif # VIRTUALIZATION |
| diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c |
| index 76b4c4e05e89c28c3f16cbb79cdfbc3cd005df00..40b52f84e11053563965526cb1cc631b665387e9 100644 |
| --- a/arch/x86/kvm/cpuid.c |
| +++ b/arch/x86/kvm/cpuid.c |
| @@ -1196,6 +1196,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) |
| (1 << KVM_FEATURE_POLL_CONTROL) | |
| (1 << KVM_FEATURE_PV_SCHED_YIELD) | |
| (1 << KVM_FEATURE_ASYNC_PF_INT); |
| + if (IS_ENABLED(CONFIG_PARAVIRT_SCHED_KVM)) |
| + entry->eax |= (1 << KVM_FEATURE_PV_SCHED); |
| |
| #ifdef CONFIG_KVM_VIRT_SUSPEND_TIMING |
| entry->eax |= (1 << KVM_FEATURE_HOST_SUSPEND_TIME); |
| diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c |
| index e78f86f258c4b3fd91972632bd2887a8f13ac16d..6dcbe53f6a59d3463b97d1fc82a91454a1608b89 100644 |
| --- a/arch/x86/kvm/x86.c |
| +++ b/arch/x86/kvm/x86.c |
| @@ -2184,6 +2184,15 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) |
| ret = EXIT_FASTPATH_REENTER_GUEST; |
| } |
| break; |
| +#ifdef CONFIG_PARAVIRT_SCHED_KVM |
| + case MSR_KVM_PV_SCHED: |
| + data = kvm_read_edx_eax(vcpu); |
| + if (data == ULLONG_MAX) { |
| + kvm_skip_emulated_instruction(vcpu); |
| + ret = EXIT_FASTPATH_EXIT_HANDLED; |
| + } |
| + break; |
| +#endif |
| default: |
| break; |
| } |
| @@ -4035,6 +4044,37 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
| return 1; |
| break; |
| |
| +#ifdef CONFIG_PARAVIRT_SCHED_KVM |
| + case MSR_KVM_PV_SCHED: |
| + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED)) |
| + return 1; |
| + |
| + if (!(data & KVM_MSR_ENABLED)) |
| + break; |
| + |
| + if (!(data & ~KVM_MSR_ENABLED) && vcpu->arch.pv_sched.msr_val) { |
| + /* |
| + * Disable the feature |
| + */ |
| + vcpu->arch.pv_sched.msr_val = 0; |
| + kvm_vcpu_set_sched(vcpu, |
| + kvm_arch_vcpu_default_sched_attr(&vcpu->arch)); |
| + } else if (!kvm_gfn_to_hva_cache_init(vcpu->kvm, |
| + &vcpu->arch.pv_sched.data, data & ~KVM_MSR_ENABLED, |
| + sizeof(struct pv_sched_data))) { |
| + vcpu->arch.pv_sched.msr_val = data; |
| + kvm_arch_vcpu_set_default_sched_attr(&vcpu->arch, |
| + kvm_vcpu_get_sched(vcpu)); |
| + kvm_vcpu_set_sched(vcpu, |
| + kvm_arch_vcpu_default_sched_attr(&vcpu->arch)); |
| + } else { |
| + kvm_debug_ratelimited( |
| + "kvm:%p, vcpu:%p, msr: %llx, kvm_gfn_to_hva_cache_init failed!\n", |
| + vcpu->kvm, vcpu, data & ~KVM_MSR_ENABLED); |
| + } |
| + break; |
| +#endif |
| + |
| case MSR_KVM_POLL_CONTROL: |
| if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) |
| return 1; |
| @@ -4402,6 +4442,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
| |
| msr_info->data = vcpu->arch.pv_eoi.msr_val; |
| break; |
| +#ifdef CONFIG_PARAVIRT_SCHED_KVM |
| + case MSR_KVM_PV_SCHED: |
| + msr_info->data = vcpu->arch.pv_sched.msr_val; |
| + break; |
| +#endif |
| case MSR_KVM_POLL_CONTROL: |
| if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) |
| return 1; |
| @@ -10912,6 +10957,51 @@ static void kvm_adjust_suspend_time(struct kvm_vcpu *vcpu) |
| } |
| #endif |
| |
| +#ifdef CONFIG_PARAVIRT_SCHED_KVM |
| +/* |
| + * Update the host area of PV_SCHED with the vcpu task sched parameters |
| + * so that guest can utilize it if needed. |
| + */ |
| +static void record_vcpu_pv_sched(struct kvm_vcpu *vcpu) |
| +{ |
| + if (!kvm_arch_vcpu_pv_sched_enabled(&vcpu->arch)) |
| + return; |
| + |
| + pagefault_disable(); |
| + kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.pv_sched.data, |
| + &vcpu->arch.pv_sched.attr, PV_SCHEDATTR_HOST_OFFSET, sizeof(union vcpu_sched_attr)); |
| + pagefault_enable(); |
| +} |
| + |
| +static inline void kvm_vcpu_do_pv_sched(struct kvm_vcpu *vcpu) |
| +{ |
| + if (!kvm_vcpu_sched_enabled(vcpu)) |
| + return; |
| + |
| + if (kvm_cpu_has_pending_timer(vcpu) || kvm_cpu_has_interrupt(vcpu)) |
| + kvm_vcpu_boost(vcpu, PVSCHED_KERNCS_BOOST_IRQ); |
| + else { |
| + union vcpu_sched_attr attr; |
| + |
| + if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.pv_sched.data, |
| + &attr, PV_SCHEDATTR_GUEST_OFFSET, sizeof(attr))) |
| + return; |
| + kvm_vcpu_set_sched(vcpu, attr); |
| + } |
| +} |
| + |
| +static void kvm_vcpu_pv_sched_init(struct kvm_vcpu *vcpu) |
| +{ |
| + kvm_arch_vcpu_set_kerncs_prio(&vcpu->arch, VCPU_KERN_CS_PRIO); |
| + kvm_arch_vcpu_set_kerncs_policy(&vcpu->arch, VCPU_KERN_CS_POLICY); |
| + kvm_arch_vcpu_set_default_sched_attr(&vcpu->arch, kvm_vcpu_get_sched(vcpu)); |
| +} |
| +#else |
| +static inline void record_vcpu_pv_sched(struct kvm_vcpu *vcpu) { } |
| +static inline void kvm_vcpu_do_pv_sched(struct kvm_vcpu *vcpu) { } |
| +static inline void kvm_vcpu_pv_sched_init(struct kvm_vcpu *vcpu) { } |
| +#endif |
| + |
| /* |
| * Called within kvm->srcu read side. |
| * Returns 1 to let vcpu_run() continue the guest execution loop without |
| @@ -11015,6 +11105,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
| kvm_pmu_handle_event(vcpu); |
| if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
| kvm_pmu_deliver_pmi(vcpu); |
| + if (kvm_check_request(KVM_REQ_VCPU_PV_SCHED, vcpu)) |
| + record_vcpu_pv_sched(vcpu); |
| #ifdef CONFIG_KVM_SMM |
| if (kvm_check_request(KVM_REQ_SMI, vcpu)) |
| process_smi(vcpu); |
| @@ -11285,6 +11377,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
| guest_timing_exit_irqoff(); |
| |
| local_irq_enable(); |
| + |
| + kvm_vcpu_do_pv_sched(vcpu); |
| + |
| preempt_enable(); |
| |
| kvm_vcpu_srcu_read_lock(vcpu); |
| @@ -12384,6 +12479,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) |
| if (r) |
| goto free_guest_fpu; |
| |
| + kvm_vcpu_pv_sched_init(vcpu); |
| + |
| vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); |
| vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; |
| kvm_xen_init_vcpu(vcpu); |
| diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h |
| index 700c886117eccc1369f0cd290779785158c0fbac..79154fa34aa12d2ab04adee7b232c78d8b48f798 100644 |
| --- a/include/linux/kvm_host.h |
| +++ b/include/linux/kvm_host.h |
| @@ -174,6 +174,7 @@ static inline bool is_error_page(struct page *page) |
| #define KVM_REQ_UNBLOCK 2 |
| #define KVM_REQ_DIRTY_RING_SOFT_FULL 3 |
| #define KVM_REQ_SUSPEND_TIME_ADJ 5 |
| +#define KVM_REQ_VCPU_PV_SCHED 6 |
| #define KVM_REQUEST_ARCH_BASE 8 |
| |
| /* |
| @@ -2611,4 +2612,18 @@ static inline u64 vcpu_suspend_time_injected(struct kvm_vcpu *vcpu) |
| } |
| #endif /* CONFIG_KVM_VIRT_SUSPEND_TIMING */ |
| |
| +#ifdef CONFIG_PARAVIRT_SCHED_KVM |
| +int kvm_vcpu_set_sched(struct kvm_vcpu *vcpu, union vcpu_sched_attr attr); |
| +union vcpu_sched_attr kvm_vcpu_get_sched(struct kvm_vcpu *vcpu); |
| + |
| +static inline bool kvm_vcpu_sched_enabled(struct kvm_vcpu *vcpu) |
| +{ |
| + return kvm_arch_vcpu_pv_sched_enabled(&vcpu->arch); |
| +} |
| + |
| +void kvm_vcpu_boost(struct kvm_vcpu *vcpu, enum kerncs_boost_type boost_type); |
| +#else |
| +static inline void kvm_vcpu_boost(struct kvm_vcpu *vcpu, enum kerncs_boost_type boost_type) { } |
| +#endif |
| + |
| #endif |
| diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h |
| index 960c7e93d1a98a363df4c6e7eeaffee065512f7c..b7c11650ec84f8a740e1111fd460756a98a0a88e 100644 |
| --- a/include/uapi/linux/kvm_para.h |
| +++ b/include/uapi/linux/kvm_para.h |
| @@ -2,6 +2,9 @@ |
| #ifndef _UAPI__LINUX_KVM_PARA_H |
| #define _UAPI__LINUX_KVM_PARA_H |
| |
| +#include <linux/const.h> |
| +#include <linux/types.h> |
| + |
| /* |
| * This header file provides a method for making a hypercall to the host |
| * Architectures should define: |
| @@ -31,6 +34,52 @@ |
| #define KVM_HC_SCHED_YIELD 11 |
| #define KVM_HC_MAP_GPA_RANGE 12 |
| |
| +enum kerncs_boost_type { |
| + PVSCHED_KERNCS_BOOST_PREEMPT_DISABLED = 0x1, |
| + PVSCHED_KERNCS_BOOST_IRQ = 0x2, |
| + PVSCHED_KERNCS_BOOST_SOFTIRQ = 0x4, |
| + PVSCHED_KERNCS_BOOST_IDLE = 0x8, |
| + PVSCHED_KERNCS_BOOST_ALL = 0xF, |
| +}; |
| + |
| +union vcpu_sched_attr { |
| + struct { |
| + __u8 enabled; |
| + __u8 sched_policy; |
| + __s8 sched_nice; |
| + __u8 rt_priority; |
| + /* |
| + * Guest running a kernel critical section: |
| + * - nmi, irq, softirq, preemption disabled. |
| + */ |
| + __u8 kern_cs; |
| + }; |
| + __u64 pad; |
| +}; |
| + |
| +enum pv_schedattr_type { |
| + PV_SCHEDATTR_GUEST = 0, |
| + PV_SCHEDATTR_HOST, |
| + PV_SCHEDATTR_MAX |
| +}; |
| + |
| +/* |
| + * Offset of guest area in the PV_SCHED shared memory. |
| + */ |
| +#define PV_SCHEDATTR_GUEST_OFFSET 0 |
| + |
| +/* |
| + * Offset of the host area in the PV_SCHED shared memory. |
| + */ |
| +#define PV_SCHEDATTR_HOST_OFFSET (sizeof(union vcpu_sched_attr)) |
| + |
| +/* |
| + * PARAVIRT_SCHED info shared between host and guest. |
| + */ |
| +struct pv_sched_data { |
| + union vcpu_sched_attr attr[PV_SCHEDATTR_MAX]; |
| +}; |
| + |
| /* |
| * hypercalls use architecture specific |
| */ |
| diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c |
| index 758ece99863c0ef0c65555d9fc68dea790773411..5e086be7a8dc21ea3e4292a8b6133f3295fc5402 100644 |
| --- a/virt/kvm/kvm_main.c |
| +++ b/virt/kvm/kvm_main.c |
| @@ -54,6 +54,9 @@ |
| #include <asm/ioctl.h> |
| #include <linux/uaccess.h> |
| |
| +#include <linux/sched.h> |
| +#include <uapi/linux/sched/types.h> |
| + |
| #include "coalesced_mmio.h" |
| #include "async_pf.h" |
| #include "kvm_mm.h" |
| @@ -4108,6 +4111,148 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) |
| } |
| EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up); |
| |
| +#ifdef CONFIG_PARAVIRT_SCHED_KVM |
| +union vcpu_sched_attr kvm_vcpu_get_sched(struct kvm_vcpu *vcpu) |
| +{ |
| + union vcpu_sched_attr attr = { 0 }; |
| + struct pid *pid; |
| + struct task_struct *vcpu_task = NULL; |
| + |
| + rcu_read_lock(); |
| + pid = rcu_dereference(vcpu->pid); |
| + if (pid) |
| + vcpu_task = get_pid_task(pid, PIDTYPE_PID); |
| + rcu_read_unlock(); |
| + if (vcpu_task == NULL) |
| + return attr; |
| + |
| + attr.sched_policy = vcpu_task->policy; |
| + if (vcpu_task->policy == SCHED_RR || vcpu_task->policy == SCHED_FIFO) |
| + attr.rt_priority = vcpu_task->rt_priority; |
| + else |
| + attr.sched_nice = task_nice(vcpu_task); |
| + |
| + put_task_struct(vcpu_task); |
| + |
| + return attr; |
| +} |
| + |
| + /* |
| + * Check if we need to act on the boost/unboost request. |
| + * Returns true if: |
| + * - caller is requesting boost and vcpu is boosted, or |
| + * - caller is requesting unboost and vcpu is not boosted. |
| + */ |
| +static inline bool __needs_set_sched(struct kvm_vcpu *vcpu, int policy, int prio, int nice) |
| +{ |
| + union vcpu_sched_attr attr = { |
| + .sched_policy = policy, |
| + .rt_priority = prio, |
| + .sched_nice = nice |
| + }; |
| + |
| + return kvm_arch_vcpu_normalprio_cmp(&vcpu->arch, attr); |
| +} |
| + |
| +int kvm_vcpu_set_sched(struct kvm_vcpu *vcpu, union vcpu_sched_attr attr) |
| +{ |
| + int rt_prio; |
| + int nice; |
| + int policy; |
| + int ret = 0; |
| + struct pid *pid; |
| + struct task_struct *vcpu_task = NULL; |
| + int max_rt_prio = kvm_arch_vcpu_kerncs_prio(&vcpu->arch); |
| + |
| +retry_disable: |
| + /* |
| + * If the feature is disabled, revert to CFS. |
| + */ |
| + if (!kvm_vcpu_sched_enabled(vcpu)) |
| + attr = kvm_arch_vcpu_default_sched_attr(&vcpu->arch); |
| + |
| + policy = attr.sched_policy; |
| + rt_prio = attr.rt_priority; |
| + nice = attr.sched_nice; |
| + if (attr.kern_cs || policy == SCHED_DEADLINE) { |
| + nice = 0; |
| + policy = kvm_arch_vcpu_kerncs_policy(&vcpu->arch); |
| + rt_prio = max_rt_prio; |
| + } else if (policy == SCHED_FIFO || policy == SCHED_RR) { |
| + nice = 0; |
| + if (rt_prio > max_rt_prio) |
| + rt_prio = max_rt_prio; |
| + } else { |
| + rt_prio = 0; |
| + } |
| + |
| + rcu_read_lock(); |
| + pid = rcu_dereference(vcpu->pid); |
| + if (pid) |
| + vcpu_task = get_pid_task(pid, PIDTYPE_PID); |
| + rcu_read_unlock(); |
| + if (vcpu_task == NULL) |
| + return -KVM_EINVAL; |
| + |
| + /* |
| + * This might be called from interrupt context. |
| + * Since we do not use rt-mutexes, we can safely call |
| + * sched_setscheduler_pi_nocheck with pi = false. |
| + * NOTE: If in future, we use rt-mutexes, this should |
| + * be modified to use a tasklet to do boost/unboost. |
| + */ |
| + WARN_ON_ONCE(vcpu_task->pi_top_task); |
| + if (__needs_set_sched(vcpu, policy, rt_prio, nice)) { |
| + struct sched_attr sattr = { |
| + .sched_policy = policy, |
| + .sched_nice = nice, |
| + .sched_priority = rt_prio, |
| + }; |
| + ret = sched_setattr_pi_nocheck(vcpu_task, &sattr, false); |
| + } |
| + |
| + /* |
| + * values to return to guest. |
| + */ |
| + attr.sched_policy = vcpu_task->policy; |
| + if (task_is_realtime(vcpu_task)) |
| + attr.rt_priority = vcpu_task->rt_priority; |
| + else |
| + attr.sched_nice = task_nice(vcpu_task); |
| + |
| + put_task_struct(vcpu_task); |
| + |
| + attr.enabled = kvm_vcpu_sched_enabled(vcpu); |
| + /* |
| + * If the feature is disabled, we set it in the priority field to let the guest know. |
| + */ |
| + if (!attr.enabled) { |
| + /* |
| + * There could be a race where the disable path disabled the feature |
| + * but we did the boost without knowing that disable was in progress. |
| + * Unboost again. |
| + */ |
| + if (attr.sched_policy != SCHED_NORMAL || attr.sched_nice != 0) |
| + goto retry_disable; |
| + } |
| + |
| + kvm_arch_vcpu_set_sched_attr(&vcpu->arch, attr); |
| + kvm_make_request(KVM_REQ_VCPU_PV_SCHED, vcpu); |
| + |
| + return ret; |
| +} |
| + |
| +void kvm_vcpu_boost(struct kvm_vcpu *vcpu, enum kerncs_boost_type boost_type) |
| +{ |
| + union vcpu_sched_attr attr = { |
| + .kern_cs = boost_type |
| + }; |
| + |
| + kvm_vcpu_set_sched(vcpu, attr); |
| +} |
| +EXPORT_SYMBOL_GPL(kvm_vcpu_boost); |
| +#endif |
| + |
| #ifndef CONFIG_S390 |
| /* |
| * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. |
| -- |
| 2.46.0.rc2.264.g509ed76dc8-goog |
| |