blob: 0fcabb7eac06c8e2d28347d141ade33a68f4cea5 [file] [log] [blame]
From 7f740b2fb463b933be0caa9fe572fdc421bb844a Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <vineethrp@google.com>
Date: Fri, 29 Sep 2023 09:35:26 -0400
Subject: [PATCH] CHROMIUM: sched/fair: Raise SCHED_SOFTIRQ less aggressively
This patch is trying to address a high rate of SCHED_SOFTIRQ
specifically in lowres mode. Rate of SCHED_SOFTIRQ is controlled by
rq->next_balance and this is updated in newidle_balance and
rebalance_domains. rq->next_balance is updated as
sd->last_balance + by a calculated period 'interval'.
newidle_balance gets called when cpu is going to be idle and
rq->next_balance is updated at couple of places in this function. It
gets updated even if we don't do load_balancing due to average idle
being less than cost for migration. And since sd->last_balance is
updated only in rebalance_domain, it gets very old and stale. These
causes the rq->next_balance get updated to a value before jiffies and
inturn causes SCHED_SOFTIRQ to be raised on every tick and incurs more
power utilization than needed.
We should not be updating the rq->next_balance if average idle
less than migration cost. Subsequent newidle_balance will take care of
this if needed. Also, update sd->last_balance when we load_balance in
newidle_balance.
With this fix, we are seeing a considerable reduction in the softirqs,
especially SCHED_SOFTIRQs. Running cyclictest on a 12 core i7-1265U, we
see the following improvements:
cyclictest command: cyclictest -i 100 -d 100 -m -q -D 1m
perf command: perf stat -e "irq:softirq*" -e "timer:tick_fire" \
-e "timer:tick_handle" -e "sched:sched_softirq" \
--timeout 10000
( tick_handle is a custom trace point at tick_sched_handle(),
sched_softirq is a custome trace point in run_rebalance_domains())
Test Results
============
Idle System | With Fix | Without Fix |
---------------------------------------------------------
| highres | lowres | highres | lowres
---------------------------------------------------------
timer:tick_handle | 1421 | 1804 | 1474 | 2055
irq:softirq_raise | 2164 | 2065 | 2374 | 2964
irq:softirq_exit | 2164 | 2065 | 2374 | 2964
irq:softirq_entry | 2164 | 2065 | 2374 | 2964
sched:sched_softirq | 701 | 787 | 1049 | 1453
Cyclictest | With Fix | Without Fix |
---------------------------------------------------------
| highres | lowres | highres | lowres
---------------------------------------------------------
timer:tick_handle | 120120 | 119978 | 120120 | 119979
irq:softirq_raise | 14970 | 4340 | 19958 | 99829
irq:softirq_exit | 14970 | 4340 | 19958 | 99829
irq:softirq_entry | 14970 | 4340 | 19958 | 99829
sched:sched_softirq | 12670 | 1268 | 14408 | 97763
Youtube Fullscreen | With Fix | Without Fix |
---------------------------------------------------------
| highres | lowres | highres | lowres
---------------------------------------------------------
timer:tick_handle | 12637 | 12126 | 11653 | 12999
irq:softirq_raise | 7268 | 8440 | 12130 | 11395
irq:softirq_exit | 7268 | 8440 | 12130 | 11395
irq:softirq_entry | 7268 | 8440 | 12130 | 11395
sched:sched_softirq | 2403 | 2371 | 5742 | 7500
References:
commit 52a08ef1f13a1 ("sched: Fix the rq->next_balance logic in
rebalance_domains() and idle_balance()")
commit 8ea9183db4ad ("sched/fair: Cleanup newidle_balance")
(Also adding a sysctl knob to enable the fix dynamically for finc
testing.)
BUG=b:263289152
UPSTREAM-TASK=b:303095913
TEST=boot/cyclictest/youtube fullscreen
Signed-off-by: Vineeth Pillai <vineethrp@google.com>
Co-developed-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: Joel Fernandes <joelaf@google.com>
Change-Id: Ie984b576d07d02f94d7bb2a490263380e9594432
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/4902449
Auto-Submit: Vineeth Pillai <vineethrp@google.com>
Reviewed-by: Joel Fernandes <joelaf@google.com>
Tested-by: Vineeth Pillai <vineethrp@google.com>
Commit-Queue: Vineeth Pillai <vineethrp@google.com>
Kcr-patch: 703fa28ab29cf8765e04b04e1ef4367062617700b237c5ee8642d0fb.patch
[rebase66(tzungbi):
Squashed:
FIXUP: CHROMIUM: sched/fair: Make minimum load balance interval a tunable
(https://crrev.com/c/4919317)
]
Signed-off-by: Tzung-Bi Shih <tzungbi@chromium.org>
---
kernel/sched/fair.c | 34 ++++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 099a2c89a7ea2991d0433043ea161a5ba63e9759..e99ee29920ea4f46211c99fb9bf5a052378c76e0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -78,12 +78,6 @@ static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
-/*
- * The minimum load balance interval in jiffies that must pass before a
- * a periodic or nohz-idle balance happens.
- */
-static unsigned long __read_mostly sysctl_sched_min_load_balance_interval = 1UL;
-
int sched_thermal_decay_shift;
static int __init setup_sched_thermal_decay_shift(char *str)
{
@@ -136,6 +130,16 @@ static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
#endif
+#ifdef CONFIG_SMP
+/*
+ * The minimum load balance interval in jiffies that must pass before a
+ * a periodic or nohz-idle balance happens.
+ */
+static unsigned long __read_mostly sysctl_sched_min_load_balance_interval = 1UL;
+
+DEFINE_STATIC_KEY_TRUE(sched_aggressive_next_balance);
+#endif
+
#ifdef CONFIG_SYSCTL
static struct ctl_table sched_fair_sysctls[] = {
#ifdef CONFIG_CFS_BANDWIDTH
@@ -158,6 +162,8 @@ static struct ctl_table sched_fair_sysctls[] = {
.extra1 = SYSCTL_ZERO,
},
#endif /* CONFIG_NUMA_BALANCING */
+
+#ifdef CONFIG_SMP
{
.procname = "sched_min_load_balance_interval",
.data = &sysctl_sched_min_load_balance_interval,
@@ -165,6 +171,13 @@ static struct ctl_table sched_fair_sysctls[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
+ {
+ .procname = "sched_aggressive_next_balance",
+ .data = &sched_aggressive_next_balance.key,
+ .mode = 0644,
+ .proc_handler = proc_do_static_key,
+ },
+#endif
};
static int __init sched_fair_sysctl_init(void)
@@ -12468,7 +12481,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
if (!get_rd_overloaded(this_rq->rd) ||
(sd && this_rq->avg_idle < sd->max_newidle_lb_cost)) {
- if (sd)
+ if (static_branch_likely(&sched_aggressive_next_balance) && sd)
update_next_balance(sd, &next_balance);
rcu_read_unlock();
@@ -12485,7 +12498,8 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
for_each_domain(this_cpu, sd) {
u64 domain_cost;
- update_next_balance(sd, &next_balance);
+ if (static_branch_likely(&sched_aggressive_next_balance))
+ update_next_balance(sd, &next_balance);
if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
break;
@@ -12499,6 +12513,10 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
t1 = sched_clock_cpu(this_cpu);
domain_cost = t1 - t0;
update_newidle_cost(sd, domain_cost);
+ if (!static_branch_likely(&sched_aggressive_next_balance)) {
+ sd->last_balance = jiffies;
+ update_next_balance(sd, &next_balance);
+ }
curr_cost += domain_cost;
t0 = t1;
--
2.45.1.288.g0e0cd299f1-goog