blob: b136049b56747d9c3f226588ab342ec2ab40faad [file] [log] [blame]
From d21c0c4a062f1184a0e1920caade3ac55c739482 Mon Sep 17 00:00:00 2001
From: Chris Hyser <chris.hyser@oracle.com>
Date: Wed, 24 Mar 2021 17:40:15 -0400
Subject: [PATCH] FROMLIST: sched: prctl() core-scheduling interface
This patch provides support for setting and copying core scheduling
'task cookies' between threads (PID), processes (TGID), and process
groups (PGID).
The value of core scheduling isn't that tasks don't share a core,
'nosmt' can do that. The value lies in exploiting all the sharing
opportunities that exist to recover possible lost performance and that
requires a degree of flexibility in the API.
From a security perspective (and there are others), the thread,
process and process group distinction is an existent hierarchal
categorization of tasks that reflects many of the security concerns
about 'data sharing'. For example, protecting against cache-snooping
by a thread that can just read the memory directly isn't all that
useful.
With this in mind, subcommands to CREATE/SHARE (TO/FROM) provide a
mechanism to create and share cookies. CREATE/SHARE_TO specify a
target pid with enum pidtype used to specify the scope of the targeted
tasks. For example, PIDTYPE_TGID will share the cookie with the
process and all of it's threads as typically desired in a security
scenario.
API:
prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, tgtpid, pidtype, &cookie)
prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, tgtpid, pidtype, NULL)
prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, tgtpid, pidtype, NULL)
prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, srcpid, pidtype, NULL)
where 'tgtpid/srcpid == 0' implies the current process and pidtype is
kernel enum pid_type {PIDTYPE_PID, PIDTYPE_TGID, PIDTYPE_PGID, ...}.
For return values, EINVAL, ENOMEM are what they say. ESRCH means the
tgtpid/srcpid was not found. EPERM indicates lack of PTRACE permission
access to tgtpid/srcpid. ENODEV indicates your machines lacks SMT.
BUG=b:152605392
TEST=run power_VideoCall test
(am from
https://lore.kernel.org/lkml/20210422123309.039845339@infradead.org/)
[peterz: complete rewrite]
Signed-off-by: Chris Hyser <chris.hyser@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Signed-off-by: Joel Fernandes <joelaf@google.com>
Change-Id: I46d6b321fb03610e00c0c90cefafd622d608951c
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2880793
Reviewed-by: Sonny Rao <sonnyrao@chromium.org>
---
include/linux/sched.h | 2 +
include/uapi/linux/prctl.h | 8 +++
kernel/sched/core_sched.c | 114 +++++++++++++++++++++++++++++++
kernel/sys.c | 5 ++
tools/include/uapi/linux/prctl.h | 8 +++
5 files changed, 137 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b7fb8e1633b0..477d79cf9547 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2191,6 +2191,8 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
#ifdef CONFIG_SCHED_CORE
extern void sched_core_free(struct task_struct *tsk);
extern void sched_core_fork(struct task_struct *p);
+extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
+ unsigned long uaddr);
#else
static inline void sched_core_free(struct task_struct *tsk) { }
static inline void sched_core_fork(struct task_struct *p) { }
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 40672bfe8625..b9f9b523d514 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -262,4 +262,12 @@ struct prctl_mm_map {
#define PR_PAC_SET_ENABLED_KEYS 60
#define PR_PAC_GET_ENABLED_KEYS 61
+/* Request the scheduler to share a core */
+#define PR_SCHED_CORE 62
+# define PR_SCHED_CORE_GET 0
+# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX 4
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index dcbbeaefaaa3..9a80e9a474c0 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/prctl.h>
#include "sched.h"
/*
@@ -113,3 +114,116 @@ void sched_core_free(struct task_struct *p)
{
sched_core_put_cookie(p->core_cookie);
}
+
+static void __sched_core_set(struct task_struct *p, unsigned long cookie)
+{
+ cookie = sched_core_get_cookie(cookie);
+ cookie = sched_core_update_cookie(p, cookie);
+ sched_core_put_cookie(cookie);
+}
+
+/* Called from prctl interface: PR_SCHED_CORE */
+int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
+ unsigned long uaddr)
+{
+ unsigned long cookie = 0, id = 0;
+ struct task_struct *task, *p;
+ struct pid *grp;
+ int err = 0;
+
+ if (!static_branch_likely(&sched_smt_present))
+ return -ENODEV;
+
+ if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 ||
+ (cmd != PR_SCHED_CORE_GET && uaddr))
+ return -EINVAL;
+
+ rcu_read_lock();
+ if (pid == 0) {
+ task = current;
+ } else {
+ task = find_task_by_vpid(pid);
+ if (!task) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
+ }
+ get_task_struct(task);
+ rcu_read_unlock();
+
+ /*
+ * Check if this process has the right to modify the specified
+ * process. Use the regular "ptrace_may_access()" checks.
+ */
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ switch (cmd) {
+ case PR_SCHED_CORE_GET:
+ if (type != PIDTYPE_PID || uaddr & 7) {
+ err = -EINVAL;
+ goto out;
+ }
+ cookie = sched_core_clone_cookie(task);
+ if (cookie) {
+ /* XXX improve ? */
+ ptr_to_hashval((void *)cookie, &id);
+ }
+ err = put_user(id, (u64 __user *)uaddr);
+ goto out;
+
+ case PR_SCHED_CORE_CREATE:
+ cookie = sched_core_alloc_cookie();
+ if (!cookie) {
+ err = -ENOMEM;
+ goto out;
+ }
+ break;
+
+ case PR_SCHED_CORE_SHARE_TO:
+ cookie = sched_core_clone_cookie(current);
+ break;
+
+ case PR_SCHED_CORE_SHARE_FROM:
+ if (type != PIDTYPE_PID) {
+ err = -EINVAL;
+ goto out;
+ }
+ cookie = sched_core_clone_cookie(task);
+ __sched_core_set(current, cookie);
+ goto out;
+
+ default:
+ err = -EINVAL;
+ goto out;
+ };
+
+ if (type == PIDTYPE_PID) {
+ __sched_core_set(task, cookie);
+ goto out;
+ }
+
+ read_lock(&tasklist_lock);
+ grp = task_pid_type(task, type);
+
+ do_each_pid_thread(grp, type, p) {
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) {
+ err = -EPERM;
+ goto out_tasklist;
+ }
+ } while_each_pid_thread(grp, type, p);
+
+ do_each_pid_thread(grp, type, p) {
+ __sched_core_set(p, cookie);
+ } while_each_pid_thread(grp, type, p);
+out_tasklist:
+ read_unlock(&tasklist_lock);
+
+out:
+ sched_core_put_cookie(cookie);
+ put_task_struct(task);
+ return err;
+}
+
diff --git a/kernel/sys.c b/kernel/sys.c
index 1aa66ddf7e4a..c6350232165c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2567,6 +2567,11 @@ int ksys_prctl(int option, unsigned long arg2, unsigned long arg3,
error = set_syscall_user_dispatch(arg2, arg3, arg4,
(char __user *) arg5);
break;
+#ifdef CONFIG_SCHED_CORE
+ case PR_SCHED_CORE:
+ error = sched_core_share_pid(arg2, arg3, arg4, arg5);
+ break;
+#endif
default:
error = -EINVAL;
break;
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 18a9f59dc067..967d9c55323d 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -259,4 +259,12 @@ struct prctl_mm_map {
#define PR_PAC_SET_ENABLED_KEYS 60
#define PR_PAC_GET_ENABLED_KEYS 61
+/* Request the scheduler to share a core */
+#define PR_SCHED_CORE 62
+# define PR_SCHED_CORE_GET 0
+# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX 4
+
#endif /* _LINUX_PRCTL_H */
--
2.32.0.272.g935e593368-goog