blob: c0c4c26756b064c692ae6ae6af123bed7122de92 [file] [log] [blame]
From 926aef935fcdb07828b4ff07477de7147a4bc728 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Wed, 16 Jun 2021 17:53:29 -0400
Subject: [PATCH] FROMLIST: schedutil: Fix iowait boost issues for slow I/O
devices
The iowait boost code is currently broken. Following are the issues and
possible solitions:
Issue #1: If a CPU requests iowait boost in a cluster, another CPU can
go ahead and decay it very quickly if it thinks there's no new request
for the iowait boosting CPU in the meanwhile. To fix this, track when
the iowait boost was last applied to a policy. This happens when
should_update_freq() returns true. I have made the code wait for at
least 10 ticks between 2 different iowait_boost_apply() for any decay to
happen, and made it configurable via sysctl.
Issue #2: If the iowait is longer than a tick, then successive iowait
boost doubling does not happen. So I/O waiting tasks for slow devices
never gets a boost. This is much worse if the tick rate is high since we
use ticks to measure if no new I/O completion happened. To workaround
this, be liberal about how many ticks should elapse before resetting the
boost. I have chosen a conservative number of 20, and made it
configurable via sysctl.
(am from https://lore.kernel.org/patchwork/patch/1448374/)
TEST=dd if=zeros of=/dev/null bs=1M count=64 iflag=direct
(Throughput mprovement from 180MB/s to 200MB/s).
BUG=b:157448032
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Change-Id: Ibe21e1d5347bfe9ba67084a1eb83f517e7724425
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2971351
Reviewed-by: Sean Paul <seanpaul@chromium.org>
Reviewed-by: Joel Fernandes <joelaf@google.com>
Commit-Queue: Joel Fernandes <joelaf@google.com>
Tested-by: Joel Fernandes <joelaf@google.com>
[rebase61(tzungbi):
Squashed:
FIXUP: FROMLIST: schedutil: Fix iowait boost issues for slow I/O devices
]
Signed-off-by: Tzung-Bi Shih <tzungbi@chromium.org>
---
include/linux/sched/sysctl.h | 3 +++
kernel/sched/core.c | 3 +++
kernel/sched/cpufreq_schedutil.c | 22 +++++++++++++++++++---
kernel/sched/sched.h | 3 +++
kernel/sysctl.c | 14 ++++++++++++++
5 files changed, 42 insertions(+), 3 deletions(-)
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 5a64582b086b2864c454642df8257ecde39d394a..7875b952e07c239afd534748a710fab1f77135e2 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -19,6 +19,9 @@ enum sched_tunable_scaling {
SCHED_TUNABLESCALING_END,
};
+extern unsigned int sysctl_iowait_reset_ticks;
+extern unsigned int sysctl_iowait_apply_ticks;
+
#define NUMA_BALANCING_DISABLED 0x0
#define NUMA_BALANCING_NORMAL 0x1
#define NUMA_BALANCING_MEMORY_TIERING 0x2
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2299a5cfbfb9ed250695edb75d977edbe61eaf43..e1312093c0632f81b4c14a67a06082f7e952a9fb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -149,6 +149,9 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
*/
const_debug unsigned int sysctl_sched_nr_migrate = SCHED_NR_MIGRATE_BREAK;
+unsigned int sysctl_iowait_reset_ticks = 20;
+unsigned int sysctl_iowait_apply_ticks = 10;
+
__read_mostly int scheduler_running;
#ifdef CONFIG_SCHED_CORE
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 4492608b7d7f1c715f46ae462fd760e074066ced..0d85477a4d255593c0a126fb6d4fcbf605e95094 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -20,6 +20,7 @@ struct sugov_policy {
struct list_head tunables_hook;
raw_spinlock_t update_lock;
+ u64 last_update;
u64 last_freq_update_time;
s64 freq_update_delay_ns;
unsigned int next_freq;
@@ -178,9 +179,13 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
bool set_iowait_boost)
{
s64 delta_ns = time - sg_cpu->last_update;
+ unsigned int ticks = TICK_NSEC;
+
+ if (sysctl_iowait_reset_ticks)
+ ticks = sysctl_iowait_reset_ticks * TICK_NSEC;
- /* Reset boost only if a tick has elapsed since last request */
- if (delta_ns <= TICK_NSEC)
+ /* Reset boost only if enough ticks has elapsed since last request. */
+ if (delta_ns <= ticks)
return false;
sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
@@ -254,6 +259,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
unsigned long max_cap)
{
+ struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long boost;
/* No boost currently required */
@@ -264,7 +270,9 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
if (sugov_iowait_reset(sg_cpu, time, false))
return;
- if (!sg_cpu->iowait_boost_pending) {
+ if (!sg_cpu->iowait_boost_pending &&
+ (!sysctl_iowait_apply_ticks ||
+ (time - sg_policy->last_update > (sysctl_iowait_apply_ticks * TICK_NSEC)))) {
/*
* No boost pending; reduce the boost value.
*/
@@ -453,6 +461,14 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
if (!sugov_update_next_freq(sg_policy, time, next_f))
goto unlock;
+ /*
+ * Required for ensuring iowait decay does not happen too
+ * quickly. This can happen, for example, if a neighboring CPU
+ * does a cpufreq update immediately after a CPU that just
+ * completed I/O.
+ */
+ sg_policy->last_update = time;
+
if (sg_policy->policy->fast_switch_enabled)
cpufreq_driver_fast_switch(sg_policy->policy, next_f);
else
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 04846272409cc00f20f24d8cc6456554d67aba0a..4d516a75f81c130b7c0ac8b18ef638cfaffd815f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2526,6 +2526,9 @@ extern const_debug unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_base_slice;
+extern unsigned int sysctl_iowait_reset_ticks;
+extern unsigned int sysctl_iowait_apply_ticks;
+
#ifdef CONFIG_SCHED_DEBUG
extern int sysctl_resched_latency_warn_ms;
extern int sysctl_resched_latency_warn_once;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 354a2d294f526ad6688168443913385eda101fa1..196309266662923f30e6686cb0a24e3b79da19bc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1623,6 +1623,20 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "iowait_reset_ticks",
+ .data = &sysctl_iowait_reset_ticks,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "iowait_apply_ticks",
+ .data = &sysctl_iowait_apply_ticks,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
--
2.34.1