| From ddf68d97a516940633c404a61aefe7715f11fbdd Mon Sep 17 00:00:00 2001 |
| From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> |
| Date: Fri, 26 Mar 2021 16:19:20 -0700 |
| Subject: [PATCH] FROMLIST: sched/fair: Consider SMT in ASYM_PACKING load |
| balance |
| |
| When deciding to pull tasks in ASYM_PACKING, it is necessary not only to |
| check for the idle state of the destination CPU, dst_cpu, but also of |
| its SMT siblings. |
| |
| If dst_cpu is idle but its SMT siblings are busy, performance suffers |
| if it pulls tasks from a medium priority CPU that does not have SMT |
| siblings. |
| |
| Implement asym_smt_can_pull_tasks() to inspect the state of the SMT |
| siblings of both dst_cpu and the CPUs in the candidate busiest group. |
| |
| Cc: Aubrey Li <aubrey.li@intel.com> |
| Cc: Ben Segall <bsegall@google.com> |
| Cc: Daniel Bristot de Oliveira <bristot@redhat.com> |
| Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> |
| Cc: Mel Gorman <mgorman@suse.de> |
| Cc: Quentin Perret <qperret@google.com> |
| Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> |
| Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> |
| Cc: Steven Rostedt <rostedt@goodmis.org> |
| Cc: Tim Chen <tim.c.chen@linux.intel.com> |
| Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org> |
| Reviewed-by: Len Brown <len.brown@intel.com> |
| Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> |
| (am from https://lore.kernel.org/patchwork/patch/1474506/) |
| |
| TOREVERT=b:192050150 |
| BUG=b:179699891 |
| TEST=basic boot on brya and check /proc/sys/kernel/sched_itmt_enabled |
| should be 1 |
| TEST=spin N+1 threads where N = number of physical cores - expect |
| N threads to run on N physical cores and the N+1st thread |
| to run on an atom CPU |
| |
| Change-Id: I2840e8087f6b3b74b3e40879a80e232e91483535 |
| |
| --- |
| Changes since v3: |
| * Removed the arch_asym_check_smt_siblings() hook. Discussions with the |
| powerpc folks showed that this patch should not impact them. Also, more |
| recent powerpc processor no longer use asym_packing. (PeterZ) |
| * Removed unnecessary local variable in asym_can_pull_tasks(). (Dietmar) |
| * Removed unnecessary check for local CPUs when the local group has zero |
| utilization. (Joel) |
| * Renamed asym_can_pull_tasks() as asym_smt_can_pull_tasks() to reflect |
| the fact that it deals with SMT cases. |
| * Made asym_smt_can_pull_tasks() return false for !CONFIG_SCHED_SMT so |
| that callers can deal with non-SMT cases. |
| |
| Changes since v2: |
| * Reworded the commit message to reflect updates in code. |
| * Corrected misrepresentation of dst_cpu as the CPU doing the load |
| balancing. (PeterZ) |
| * Removed call to arch_asym_check_smt_siblings() as it is now called in |
| sched_asym(). |
| |
| Changes since v1: |
| * Don't bailout in update_sd_pick_busiest() if dst_cpu cannot pull |
| tasks. Instead, reclassify the candidate busiest group, as it |
| may still be selected. (PeterZ) |
| * Avoid an expensive and unnecessary call to cpumask_weight() when |
| determining if a sched_group is comprised of SMT siblings. |
| (PeterZ). |
| |
| Signed-off-by: George D Sworo <george.d.sworo@intel.com> |
| Change-Id: Id0072bea20d3ac2b551c84b54a5b9327f2bde4b4 |
| Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2953070 |
| Commit-Queue: Alex Levin <levinale@google.com> |
| Tested-by: George D Sworo <george.d.sworo@intel.corp-partner.google.com> |
| Reviewed-by: Alex Levin <levinale@google.com> |
| Reviewed-by: Joel Fernandes <joelaf@google.com> |
| --- |
| kernel/sched/fair.c | 95 +++++++++++++++++++++++++++++++++++++++++++++ |
| 1 file changed, 95 insertions(+) |
| |
| diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c |
| --- a/kernel/sched/fair.c |
| +++ b/kernel/sched/fair.c |
| @@ -8534,10 +8534,99 @@ group_type group_classify(unsigned int imbalance_pct, |
| return group_has_spare; |
| } |
| |
| +/** |
| + * asym_smt_can_pull_tasks - Check whether the load balancing CPU can pull tasks |
| + * @dst_cpu: Destination CPU of the load balancing |
| + * @sds: Load-balancing data with statistics of the local group |
| + * @sgs: Load-balancing statistics of the candidate busiest group |
| + * @sg: The candidate busiet group |
| + * |
| + * Check the state of the SMT siblings of both @sds::local and @sg and decide |
| + * if @dst_cpu can pull tasks. If @dst_cpu does not have SMT siblings, it can |
| + * pull tasks if two or more of the SMT siblings of @sg are busy. If only one |
| + * CPU in @sg is busy, pull tasks only if @dst_cpu has higher priority. |
| + * |
| + * If both @dst_cpu and @sg have SMT siblings, even the number of idle CPUs |
| + * between @sds::local and @sg. Thus, pull tasks from @sg if the difference |
| + * between the number of busy CPUs is 2 or more. If the difference is of 1, |
| + * only pull if @dst_cpu has higher priority. If @sg does not have SMT siblings |
| + * only pull tasks if all of the SMT siblings of @dst_cpu are idle and @sg |
| + * has lower priority. |
| + */ |
| +static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds, |
| + struct sg_lb_stats *sgs, |
| + struct sched_group *sg) |
| +{ |
| +#ifdef CONFIG_SCHED_SMT |
| + bool local_is_smt, sg_is_smt; |
| + int sg_busy_cpus; |
| + |
| + local_is_smt = sds->local->flags & SD_SHARE_CPUCAPACITY; |
| + sg_is_smt = sg->flags & SD_SHARE_CPUCAPACITY; |
| + |
| + sg_busy_cpus = sgs->group_weight - sgs->idle_cpus; |
| + |
| + if (!local_is_smt) { |
| + /* |
| + * If we are here, @dst_cpu is idle and does not have SMT |
| + * siblings. Pull tasks if candidate group has two or more |
| + * busy CPUs. |
| + */ |
| + if (sg_is_smt && sg_busy_cpus >= 2) |
| + return true; |
| + |
| + /* |
| + * @dst_cpu does not have SMT siblings. @sg may have SMT |
| + * siblings and only one is busy. In such case, @dst_cpu |
| + * can help if it has higher priority and is idle. |
| + */ |
| + return !sds->local_stat.group_util && |
| + sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu); |
| + } |
| + |
| + /* @dst_cpu has SMT siblings. */ |
| + |
| + if (sg_is_smt) { |
| + int local_busy_cpus = sds->local->group_weight - |
| + sds->local_stat.idle_cpus; |
| + int busy_cpus_delta = sg_busy_cpus - local_busy_cpus; |
| + |
| + /* Local can always help to even the number busy CPUs. */ |
| + if (busy_cpus_delta >= 2) |
| + return true; |
| + |
| + if (busy_cpus_delta == 1) |
| + return sched_asym_prefer(dst_cpu, |
| + sg->asym_prefer_cpu); |
| + |
| + return false; |
| + } |
| + |
| + /* |
| + * @sg does not have SMT siblings. Ensure that @sds::local does not end |
| + * up with more than one busy SMT sibling and only pull tasks if there |
| + * are not busy CPUs. As CPUs move in and out of idle state frequently, |
| + * also check the group utilization to smoother the decision. |
| + */ |
| + if (!sds->local_stat.group_util) |
| + return sched_asym_prefer(dst_cpu, sg->asym_prefer_cpu); |
| + |
| + return false; |
| +#else |
| + /* Always return false so that callers deal with non-SMT cases. */ |
| + return false; |
| +#endif |
| +} |
| + |
| static inline bool |
| sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs, |
| struct sched_group *group) |
| { |
| + /* Only do SMT checks if either local or candidate have SMT siblings */ |
| + if ((sds->local->flags & SD_SHARE_CPUCAPACITY) || |
| + (group->flags & SD_SHARE_CPUCAPACITY)) |
| + return asym_smt_can_pull_tasks(env->dst_cpu, sds, sgs, group); |
| + |
| return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu); |
| } |
| |
| @@ -9543,6 +9632,12 @@ static struct rq *find_busiest_queue(struct lb_env *env, |
| nr_running == 1) |
| continue; |
| |
| + /* Make sure we only pull tasks from a CPU of lower priority */ |
| + if ((env->sd->flags & SD_ASYM_PACKING) && |
| + sched_asym_prefer(i, env->dst_cpu) && |
| + nr_running == 1) |
| + continue; |
| + |
| switch (env->migration_type) { |
| case migrate_load: |
| /* |
| -- |
| 2.33.0.259.gc128427fd7-goog |
| |