| From 012c89bc58a6fbdd05f72bd934142ce391a841bb Mon Sep 17 00:00:00 2001 |
| From: "Joel Fernandes (Google)" <joel@joelfernandes.org> |
| Date: Wed, 16 Jun 2021 17:53:29 -0400 |
| Subject: [PATCH] FROMLIST: schedutil: Fix iowait boost issues for slow I/O |
| devices |
| |
| The iowait boost code is currently broken. Following are the issues and |
| possible solitions: |
| |
| Issue #1: If a CPU requests iowait boost in a cluster, another CPU can |
| go ahead and decay it very quickly if it thinks there's no new request |
| for the iowait boosting CPU in the meanwhile. To fix this, track when |
| the iowait boost was last applied to a policy. This happens when |
| should_update_freq() returns true. I have made the code wait for at |
| least 10 ticks between 2 different iowait_boost_apply() for any decay to |
| happen, and made it configurable via sysctl. |
| |
| Issue #2: If the iowait is longer than a tick, then successive iowait |
| boost doubling does not happen. So I/O waiting tasks for slow devices |
| never gets a boost. This is much worse if the tick rate is high since we |
| use ticks to measure if no new I/O completion happened. To workaround |
| this, be liberal about how many ticks should elapse before resetting the |
| boost. I have chosen a conservative number of 20, and made it |
| configurable via sysctl. |
| |
| (am from https://lore.kernel.org/patchwork/patch/1448374/) |
| |
| TEST=dd if=zeros of=/dev/null bs=1M count=64 iflag=direct |
| (Throughput mprovement from 180MB/s to 200MB/s). |
| BUG=b:157448032 |
| |
| Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> |
| Change-Id: Ibe21e1d5347bfe9ba67084a1eb83f517e7724425 |
| Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2971351 |
| Reviewed-by: Sean Paul <seanpaul@chromium.org> |
| Reviewed-by: Joel Fernandes <joelaf@google.com> |
| Commit-Queue: Joel Fernandes <joelaf@google.com> |
| Tested-by: Joel Fernandes <joelaf@google.com> |
| --- |
| include/linux/sched/sysctl.h | 3 +++ |
| kernel/sched/core.c | 3 +++ |
| kernel/sched/cpufreq_schedutil.c | 22 +++++++++++++++++++--- |
| kernel/sched/sched.h | 3 +++ |
| kernel/sysctl.c | 14 ++++++++++++++ |
| 5 files changed, 42 insertions(+), 3 deletions(-) |
| |
| diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h |
| index c1076b5e17fb..9eb02a81ad64 100644 |
| --- a/include/linux/sched/sysctl.h |
| +++ b/include/linux/sched/sysctl.h |
| @@ -23,6 +23,9 @@ enum sched_tunable_scaling { |
| SCHED_TUNABLESCALING_END, |
| }; |
| |
| +extern unsigned int sysctl_iowait_reset_ticks; |
| +extern unsigned int sysctl_iowait_apply_ticks; |
| + |
| #define NUMA_BALANCING_DISABLED 0x0 |
| #define NUMA_BALANCING_NORMAL 0x1 |
| #define NUMA_BALANCING_MEMORY_TIERING 0x2 |
| diff --git a/kernel/sched/core.c b/kernel/sched/core.c |
| index c0c07bfa8423..fa443e9edad4 100644 |
| --- a/kernel/sched/core.c |
| +++ b/kernel/sched/core.c |
| @@ -145,6 +145,9 @@ const_debug unsigned int sysctl_sched_nr_migrate = 8; |
| const_debug unsigned int sysctl_sched_nr_migrate = 32; |
| #endif |
| |
| +unsigned int sysctl_iowait_reset_ticks = 20; |
| +unsigned int sysctl_iowait_apply_ticks = 10; |
| + |
| /* |
| * period over which we measure -rt task CPU usage in us. |
| * default: 1s |
| diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c |
| index 3dbf351d12d5..444c4e8a80e0 100644 |
| --- a/kernel/sched/cpufreq_schedutil.c |
| +++ b/kernel/sched/cpufreq_schedutil.c |
| @@ -20,6 +20,7 @@ struct sugov_policy { |
| struct list_head tunables_hook; |
| |
| raw_spinlock_t update_lock; |
| + u64 last_update; |
| u64 last_freq_update_time; |
| s64 freq_update_delay_ns; |
| unsigned int next_freq; |
| @@ -180,9 +181,13 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, |
| bool set_iowait_boost) |
| { |
| s64 delta_ns = time - sg_cpu->last_update; |
| + unsigned int ticks = TICK_NSEC; |
| + |
| + if (sysctl_iowait_reset_ticks) |
| + ticks = sysctl_iowait_reset_ticks * TICK_NSEC; |
| |
| - /* Reset boost only if a tick has elapsed since last request */ |
| - if (delta_ns <= TICK_NSEC) |
| + /* Reset boost only if enough ticks has elapsed since last request. */ |
| + if (delta_ns <= ticks) |
| return false; |
| |
| sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0; |
| @@ -254,6 +259,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, |
| */ |
| static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) |
| { |
| + struct sugov_policy *sg_policy = sg_cpu->sg_policy; |
| unsigned long boost; |
| |
| /* No boost currently required */ |
| @@ -264,7 +270,9 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) |
| if (sugov_iowait_reset(sg_cpu, time, false)) |
| return; |
| |
| - if (!sg_cpu->iowait_boost_pending) { |
| + if (!sg_cpu->iowait_boost_pending && |
| + (!sysctl_iowait_apply_ticks || |
| + (time - sg_policy->last_update > (sysctl_iowait_apply_ticks * TICK_NSEC)))) { |
| /* |
| * No boost pending; reduce the boost value. |
| */ |
| @@ -450,6 +458,14 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) |
| if (!sugov_update_next_freq(sg_policy, time, next_f)) |
| goto unlock; |
| |
| + /* |
| + * Required for ensuring iowait decay does not happen too |
| + * quickly. This can happen, for example, if a neighboring CPU |
| + * does a cpufreq update immediately after a CPU that just |
| + * completed I/O. |
| + */ |
| + sg_policy->last_update = time; |
| + |
| if (sg_policy->policy->fast_switch_enabled) |
| cpufreq_driver_fast_switch(sg_policy->policy, next_f); |
| else |
| diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h |
| index 8e96e6fc77f4..876b9bf43d7f 100644 |
| --- a/kernel/sched/sched.h |
| +++ b/kernel/sched/sched.h |
| @@ -2395,6 +2395,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); |
| extern const_debug unsigned int sysctl_sched_nr_migrate; |
| extern const_debug unsigned int sysctl_sched_migration_cost; |
| |
| +extern unsigned int sysctl_iowait_reset_ticks; |
| +extern unsigned int sysctl_iowait_apply_ticks; |
| + |
| #ifdef CONFIG_SCHED_DEBUG |
| extern unsigned int sysctl_sched_latency; |
| extern unsigned int sysctl_sched_min_granularity; |
| diff --git a/kernel/sysctl.c b/kernel/sysctl.c |
| index 534442702cf9..39ba56b62ef1 100644 |
| --- a/kernel/sysctl.c |
| +++ b/kernel/sysctl.c |
| @@ -1666,6 +1666,20 @@ static struct ctl_table kern_table[] = { |
| .mode = 0644, |
| .proc_handler = proc_dointvec, |
| }, |
| + { |
| + .procname = "iowait_reset_ticks", |
| + .data = &sysctl_iowait_reset_ticks, |
| + .maxlen = sizeof(unsigned int), |
| + .mode = 0644, |
| + .proc_handler = proc_dointvec, |
| + }, |
| + { |
| + .procname = "iowait_apply_ticks", |
| + .data = &sysctl_iowait_apply_ticks, |
| + .maxlen = sizeof(unsigned int), |
| + .mode = 0644, |
| + .proc_handler = proc_dointvec, |
| + }, |
| #ifdef CONFIG_SCHEDSTATS |
| { |
| .procname = "sched_schedstats", |
| -- |
| 2.35.0 |
| |