| From d21c0c4a062f1184a0e1920caade3ac55c739482 Mon Sep 17 00:00:00 2001 |
| From: Chris Hyser <chris.hyser@oracle.com> |
| Date: Wed, 24 Mar 2021 17:40:15 -0400 |
| Subject: [PATCH] FROMLIST: sched: prctl() core-scheduling interface |
| |
| This patch provides support for setting and copying core scheduling |
| 'task cookies' between threads (PID), processes (TGID), and process |
| groups (PGID). |
| |
| The value of core scheduling isn't that tasks don't share a core, |
| 'nosmt' can do that. The value lies in exploiting all the sharing |
| opportunities that exist to recover possible lost performance and that |
| requires a degree of flexibility in the API. |
| |
| From a security perspective (and there are others), the thread, |
| process and process group distinction is an existent hierarchal |
| categorization of tasks that reflects many of the security concerns |
| about 'data sharing'. For example, protecting against cache-snooping |
| by a thread that can just read the memory directly isn't all that |
| useful. |
| |
| With this in mind, subcommands to CREATE/SHARE (TO/FROM) provide a |
| mechanism to create and share cookies. CREATE/SHARE_TO specify a |
| target pid with enum pidtype used to specify the scope of the targeted |
| tasks. For example, PIDTYPE_TGID will share the cookie with the |
| process and all of it's threads as typically desired in a security |
| scenario. |
| |
| API: |
| |
| prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, tgtpid, pidtype, &cookie) |
| prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, tgtpid, pidtype, NULL) |
| prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, tgtpid, pidtype, NULL) |
| prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, srcpid, pidtype, NULL) |
| |
| where 'tgtpid/srcpid == 0' implies the current process and pidtype is |
| kernel enum pid_type {PIDTYPE_PID, PIDTYPE_TGID, PIDTYPE_PGID, ...}. |
| |
| For return values, EINVAL, ENOMEM are what they say. ESRCH means the |
| tgtpid/srcpid was not found. EPERM indicates lack of PTRACE permission |
| access to tgtpid/srcpid. ENODEV indicates your machines lacks SMT. |
| |
| BUG=b:152605392 |
| TEST=run power_VideoCall test |
| |
| (am from |
| https://lore.kernel.org/lkml/20210422123309.039845339@infradead.org/) |
| |
| [peterz: complete rewrite] |
| Signed-off-by: Chris Hyser <chris.hyser@oracle.com> |
| Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> |
| Tested-by: Don Hiatt <dhiatt@digitalocean.com> |
| Signed-off-by: Joel Fernandes <joelaf@google.com> |
| Change-Id: I46d6b321fb03610e00c0c90cefafd622d608951c |
| Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2880793 |
| Reviewed-by: Sonny Rao <sonnyrao@chromium.org> |
| --- |
| include/linux/sched.h | 2 + |
| include/uapi/linux/prctl.h | 8 +++ |
| kernel/sched/core_sched.c | 114 +++++++++++++++++++++++++++++++ |
| kernel/sys.c | 5 ++ |
| tools/include/uapi/linux/prctl.h | 8 +++ |
| 5 files changed, 137 insertions(+) |
| |
| diff --git a/include/linux/sched.h b/include/linux/sched.h |
| index b7fb8e1633b0..477d79cf9547 100644 |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -2191,6 +2191,8 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd); |
| #ifdef CONFIG_SCHED_CORE |
| extern void sched_core_free(struct task_struct *tsk); |
| extern void sched_core_fork(struct task_struct *p); |
| +extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, |
| + unsigned long uaddr); |
| #else |
| static inline void sched_core_free(struct task_struct *tsk) { } |
| static inline void sched_core_fork(struct task_struct *p) { } |
| diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h |
| index 40672bfe8625..b9f9b523d514 100644 |
| --- a/include/uapi/linux/prctl.h |
| +++ b/include/uapi/linux/prctl.h |
| @@ -262,4 +262,12 @@ struct prctl_mm_map { |
| #define PR_PAC_SET_ENABLED_KEYS 60 |
| #define PR_PAC_GET_ENABLED_KEYS 61 |
| |
| +/* Request the scheduler to share a core */ |
| +#define PR_SCHED_CORE 62 |
| +# define PR_SCHED_CORE_GET 0 |
| +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ |
| +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ |
| +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ |
| +# define PR_SCHED_CORE_MAX 4 |
| + |
| #endif /* _LINUX_PRCTL_H */ |
| diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c |
| index dcbbeaefaaa3..9a80e9a474c0 100644 |
| --- a/kernel/sched/core_sched.c |
| +++ b/kernel/sched/core_sched.c |
| @@ -1,5 +1,6 @@ |
| // SPDX-License-Identifier: GPL-2.0-only |
| |
| +#include <linux/prctl.h> |
| #include "sched.h" |
| |
| /* |
| @@ -113,3 +114,116 @@ void sched_core_free(struct task_struct *p) |
| { |
| sched_core_put_cookie(p->core_cookie); |
| } |
| + |
| +static void __sched_core_set(struct task_struct *p, unsigned long cookie) |
| +{ |
| + cookie = sched_core_get_cookie(cookie); |
| + cookie = sched_core_update_cookie(p, cookie); |
| + sched_core_put_cookie(cookie); |
| +} |
| + |
| +/* Called from prctl interface: PR_SCHED_CORE */ |
| +int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, |
| + unsigned long uaddr) |
| +{ |
| + unsigned long cookie = 0, id = 0; |
| + struct task_struct *task, *p; |
| + struct pid *grp; |
| + int err = 0; |
| + |
| + if (!static_branch_likely(&sched_smt_present)) |
| + return -ENODEV; |
| + |
| + if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 || |
| + (cmd != PR_SCHED_CORE_GET && uaddr)) |
| + return -EINVAL; |
| + |
| + rcu_read_lock(); |
| + if (pid == 0) { |
| + task = current; |
| + } else { |
| + task = find_task_by_vpid(pid); |
| + if (!task) { |
| + rcu_read_unlock(); |
| + return -ESRCH; |
| + } |
| + } |
| + get_task_struct(task); |
| + rcu_read_unlock(); |
| + |
| + /* |
| + * Check if this process has the right to modify the specified |
| + * process. Use the regular "ptrace_may_access()" checks. |
| + */ |
| + if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { |
| + err = -EPERM; |
| + goto out; |
| + } |
| + |
| + switch (cmd) { |
| + case PR_SCHED_CORE_GET: |
| + if (type != PIDTYPE_PID || uaddr & 7) { |
| + err = -EINVAL; |
| + goto out; |
| + } |
| + cookie = sched_core_clone_cookie(task); |
| + if (cookie) { |
| + /* XXX improve ? */ |
| + ptr_to_hashval((void *)cookie, &id); |
| + } |
| + err = put_user(id, (u64 __user *)uaddr); |
| + goto out; |
| + |
| + case PR_SCHED_CORE_CREATE: |
| + cookie = sched_core_alloc_cookie(); |
| + if (!cookie) { |
| + err = -ENOMEM; |
| + goto out; |
| + } |
| + break; |
| + |
| + case PR_SCHED_CORE_SHARE_TO: |
| + cookie = sched_core_clone_cookie(current); |
| + break; |
| + |
| + case PR_SCHED_CORE_SHARE_FROM: |
| + if (type != PIDTYPE_PID) { |
| + err = -EINVAL; |
| + goto out; |
| + } |
| + cookie = sched_core_clone_cookie(task); |
| + __sched_core_set(current, cookie); |
| + goto out; |
| + |
| + default: |
| + err = -EINVAL; |
| + goto out; |
| + }; |
| + |
| + if (type == PIDTYPE_PID) { |
| + __sched_core_set(task, cookie); |
| + goto out; |
| + } |
| + |
| + read_lock(&tasklist_lock); |
| + grp = task_pid_type(task, type); |
| + |
| + do_each_pid_thread(grp, type, p) { |
| + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) { |
| + err = -EPERM; |
| + goto out_tasklist; |
| + } |
| + } while_each_pid_thread(grp, type, p); |
| + |
| + do_each_pid_thread(grp, type, p) { |
| + __sched_core_set(p, cookie); |
| + } while_each_pid_thread(grp, type, p); |
| +out_tasklist: |
| + read_unlock(&tasklist_lock); |
| + |
| +out: |
| + sched_core_put_cookie(cookie); |
| + put_task_struct(task); |
| + return err; |
| +} |
| + |
| diff --git a/kernel/sys.c b/kernel/sys.c |
| index 1aa66ddf7e4a..c6350232165c 100644 |
| --- a/kernel/sys.c |
| +++ b/kernel/sys.c |
| @@ -2567,6 +2567,11 @@ int ksys_prctl(int option, unsigned long arg2, unsigned long arg3, |
| error = set_syscall_user_dispatch(arg2, arg3, arg4, |
| (char __user *) arg5); |
| break; |
| +#ifdef CONFIG_SCHED_CORE |
| + case PR_SCHED_CORE: |
| + error = sched_core_share_pid(arg2, arg3, arg4, arg5); |
| + break; |
| +#endif |
| default: |
| error = -EINVAL; |
| break; |
| diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h |
| index 18a9f59dc067..967d9c55323d 100644 |
| --- a/tools/include/uapi/linux/prctl.h |
| +++ b/tools/include/uapi/linux/prctl.h |
| @@ -259,4 +259,12 @@ struct prctl_mm_map { |
| #define PR_PAC_SET_ENABLED_KEYS 60 |
| #define PR_PAC_GET_ENABLED_KEYS 61 |
| |
| +/* Request the scheduler to share a core */ |
| +#define PR_SCHED_CORE 62 |
| +# define PR_SCHED_CORE_GET 0 |
| +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ |
| +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ |
| +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ |
| +# define PR_SCHED_CORE_MAX 4 |
| + |
| #endif /* _LINUX_PRCTL_H */ |
| -- |
| 2.32.0.272.g935e593368-goog |
| |