blob: b9a83cc29edfb32e34ccd5c01910214137ae83ce [file] [log] [blame]
From 04bdb373fe636817d588c0a5ec87c2b9f154abbd Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Tue, 17 Nov 2020 18:20:01 -0500
Subject: [PATCH] FROMLIST: sched: Add a coresched command line option
Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow those device down.
However, some users may want core scheduling even if the hardware is
secure. To support them, add a coresched= option which defaults to
'secure' and can be overridden to 'on' if the user wants to enable
coresched even if the HW is not vulnerable. 'off' would disable
core scheduling in any case.
Also add a sched_debug entry to indicate if core scheduling is turned on
or not.
(am from https://lore.kernel.org/patchwork/patch/1340786/)
BUG=b:147685035
TEST=build and boot for AMD device with 5.10 (grunt-kernelnext)
Reviewed-by: Alexander Graf <graf@amazon.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Change-Id: I392930ed0416656d009661f89f797082bf8ea5e9
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/3067969
[rebase66(tzungbi):
Revised commit message.
Squashed:
FIXUP: FROMLIST: sched: Add a coresched command line option
(https://crrev.com/c/4166884)
]
Signed-off-by: Tzung-Bi Shih <tzungbi@chromium.org>
---
.../admin-guide/kernel-parameters.txt | 14 ++++++
arch/x86/kernel/cpu/bugs.c | 14 ++++++
include/linux/cpu.h | 1 +
include/linux/sched/smt.h | 4 ++
kernel/cpu.c | 45 +++++++++++++++++++
kernel/sched/core.c | 7 ++-
kernel/sched/core_sched.c | 3 ++
kernel/sched/debug.c | 4 ++
8 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 213d0719e2b7a68e4daa12482a4c60361e19e3c4..3329eb0c5a40d3231175f60728784cbb5a7cadc0 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -837,6 +837,20 @@
/proc/<pid>/coredump_filter.
See also Documentation/filesystems/proc.rst.
+ coresched= [SCHED_CORE] This feature allows the Linux scheduler
+ to force hyperthread siblings of a CPU to only execute tasks
+ concurrently on all hyperthreads that are running within the
+ same core scheduling group.
+ Possible values are:
+ 'on' - Enable scheduler capability to core schedule.
+ By default, no tasks will be core scheduled, but the coresched
+ interface can be used to form groups of tasks that are forced
+ to share a core.
+ 'off' - Disable scheduler capability to core schedule.
+ 'secure' - Like 'on' but only enable on systems affected by
+ MDS or L1TF vulnerabilities. 'off' otherwise.
+ Default: 'secure'.
+
coresight_cpu_debug.enable
[ARM,ARM64]
Format: <bool>
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ab18185894dfd5e9c3f09f5fa39ac4c8ef72e7f4..917af05d01255ad71a71cbbf1edf1a0710f48e81 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -46,6 +46,7 @@ static void __init md_clear_select_mitigation(void);
static void __init taa_select_mitigation(void);
static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
+static void __init coresched_select(void);
static void __init l1d_flush_select_mitigation(void);
static void __init srso_select_mitigation(void);
static void __init gds_select_mitigation(void);
@@ -148,6 +149,9 @@ void __init cpu_select_mitigations(void)
x86_arch_cap_msr = x86_read_arch_cap_msr();
+ /* Update whether core-scheduling is needed. */
+ coresched_select();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -3022,3 +3026,13 @@ void __warn_thunk(void)
{
WARN_ONCE(1, "Unpatched return thunk in use. This should not happen!\n");
}
+
+static void __init coresched_select(void)
+{
+#ifdef CONFIG_SCHED_CORE
+ if (coresched_cmd_secure() &&
+ !boot_cpu_has_bug(X86_BUG_MDS) &&
+ !boot_cpu_has_bug(X86_BUG_L1TF))
+ static_branch_disable(&sched_coresched_supported);
+#endif
+}
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 861c3bfc5f1700db69d90168c239a935ebad76ca..71146ec2c9684a0e5c217c4162286e2dc4a66649 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -235,4 +235,5 @@ static inline bool cpu_mitigations_auto_nosmt(void)
}
#endif
+extern bool coresched_cmd_secure(void);
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index fb1e295e7e63e20e3bb4aebdd2d05507cbc84d06..79eb265c0de0db044d0a46e8ec3bf04ae61138fb 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -17,4 +17,8 @@ static inline bool sched_smt_active(void) { return false; }
void arch_smt_update(void);
+#ifdef CONFIG_SCHED_CORE
+extern struct static_key_true sched_coresched_supported;
+#endif
+
#endif /* _LINUX_SCHED_SMT_H */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 63447eb85dab6bd0fa2ab23c25bffdf788a2b9c1..ec6494939e292e388c2096078c3b5927a8f30d26 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3245,3 +3245,48 @@ static int __init mitigations_parse_cmdline(char *arg)
}
#endif
early_param("mitigations", mitigations_parse_cmdline);
+
+#ifdef CONFIG_SCHED_CORE
+/*
+ * These are used for a global "coresched=" cmdline option for controlling
+ * core scheduling. Note that core sched may be needed for usecases other
+ * than security as well.
+ */
+enum coresched_cmds {
+ CORE_SCHED_OFF,
+ CORE_SCHED_SECURE,
+ CORE_SCHED_ON,
+};
+
+static enum coresched_cmds coresched_cmd __ro_after_init = CORE_SCHED_SECURE;
+
+static int __init coresched_parse_cmdline(char *arg)
+{
+ if (!strcmp(arg, "off"))
+ coresched_cmd = CORE_SCHED_OFF;
+ else if (!strcmp(arg, "on"))
+ coresched_cmd = CORE_SCHED_ON;
+ else if (!strcmp(arg, "secure"))
+ /*
+ * On x86, coresched=secure means coresched is enabled only if
+ * system has MDS/L1TF vulnerability (see x86/bugs.c).
+ */
+ coresched_cmd = CORE_SCHED_SECURE;
+ else
+ pr_crit("Unsupported coresched=%s, defaulting to secure.\n",
+ arg);
+
+ if (coresched_cmd == CORE_SCHED_OFF)
+ static_branch_disable(&sched_coresched_supported);
+
+ return 0;
+}
+early_param("coresched", coresched_parse_cmdline);
+
+/* coresched=secure */
+bool coresched_cmd_secure(void)
+{
+ return coresched_cmd == CORE_SCHED_SECURE;
+}
+EXPORT_SYMBOL_GPL(coresched_cmd_secure);
+#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ac66f1d87d1bdd580913f305a51c6651410bb30c..ceef924b5683f0400a76f564e70887924cc062ef 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -415,11 +415,14 @@ static void __sched_core_disable(void)
static_branch_disable(&__sched_core_enabled);
}
+DEFINE_STATIC_KEY_TRUE(sched_coresched_supported);
+
void sched_core_get(void)
{
if (atomic_inc_not_zero(&sched_core_count))
return;
-
+ if (!static_branch_likely(&sched_coresched_supported))
+ return;
mutex_lock(&sched_core_mutex);
if (!atomic_read(&sched_core_count))
__sched_core_enable();
@@ -431,6 +434,8 @@ void sched_core_get(void)
static void __sched_core_put(struct work_struct *work)
{
+ if (!static_branch_likely(&sched_coresched_supported))
+ return;
if (atomic_dec_and_mutex_lock(&sched_core_count, &sched_core_mutex)) {
__sched_core_disable();
mutex_unlock(&sched_core_mutex);
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index a57fd8f27498f60eefc9b617a57712e1eaae9c4c..6cd1946bdbe0ef4d1935d95107f69489d2e273bb 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -145,6 +145,9 @@ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
(cmd != PR_SCHED_CORE_GET && uaddr))
return -EINVAL;
+ if (!static_branch_likely(&sched_coresched_supported))
+ return 0;
+
rcu_read_lock();
if (pid == 0) {
task = current;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 56cbfa24c55556970904236cc8118a1919b2eb26..ad4f36a3d05bdfbaedb54cc6c37421d81222a749 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -873,6 +873,10 @@ static void sched_debug_header(struct seq_file *m)
"sysctl_sched_tunable_scaling",
sysctl_sched_tunable_scaling,
sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
+#ifdef CONFIG_SCHED_CORE
+ SEQ_printf(m, " .%-40s: %d\n", "core_sched_enabled",
+ !!static_branch_likely(&__sched_core_enabled));
+#endif
SEQ_printf(m, "\n");
}
--
2.45.0.rc1.225.g2a3ae87e7f-goog