Try improve multi-core scaling.
The changes are experimental - the fix works
but possible regressions are unknown.

Please read the orginal author's paper to understand
the overall problem this patch tries to solve.

See the orignal patches at:
https://github.com/jplozi/wastedcores/tree/master/patches

And the related paper at:
http://www.i3s.unice.fr/~jplozi/wastedcores/

v1: the patch was re-written for v4.9 to remove goto statement.

v2: improved version from the wastedcores initial code. try take into
        account the cpu  wake-up latency and choose most recent idle cpu
        instead.

I have tested the code and it really does improve
multi-core performance on my machine.

Reported-by: <[email protected]>
Signed-off-by: <[email protected]>
---
kernel/sched/fair.c | 138
++++++++++++++++++++++++++++++++--------------------
1 file changed, 86 insertions(+), 52 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c242944f5cbd..6569e76294d4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5630,6 +5630,10 @@ select_task_rq_fair(struct task_struct *p, int
prev_cpu, int sd_flag, int wake_f
        int new_cpu = prev_cpu;
        int want_affine = 0;
        int sync = wake_flags & WF_SYNC;
+       int chk_cpu;
+       u64 idle_stamp = 0;
+       unsigned int idle_exit_lat = UINT_MAX;
+       int idle_stamp_cpu = -1;

        if (sd_flag & SD_BALANCE_WAKE) {
                record_wakee(p);
@@ -5638,69 +5642,99 @@ select_task_rq_fair(struct task_struct *p, int
prev_cpu, int sd_flag, int wake_f
        }

        rcu_read_lock();
-       for_each_domain(cpu, tmp) {
-               if (!(tmp->flags & SD_LOAD_BALANCE))
-                       break;
+       /* fast wake up on current idle cpu */
+       if (!cpu_rq(prev_cpu)->nr_running) {
+               new_cpu = prev_cpu;
+       } else {
+               /* Check for suitable unused cores */
+               for_each_cpu_and(chk_cpu, cpu_online_mask, tsk_cpus_allowed(p)) 
{
+                       struct rq *rq = cpu_rq(chk_cpu);
+                       struct cpuidle_state *idle = idle_get_state(rq);

-               /*
-                * If both cpu and prev_cpu are part of this domain,
-                * cpu is a valid SD_WAKE_AFFINE target.
-                */
-               if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
-                   cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
-                       affine_sd = tmp;
-                       break;
+                       if (!rq->nr_running) {
+                               if (idle && idle->exit_latency < idle_exit_lat) 
{
+                                       /* Always pick lower latency idle cpu */
+                                       idle_exit_lat = idle->exit_latency;
+                                       idle_stamp = rq->idle_stamp;
+                                       idle_stamp_cpu = chk_cpu;
+                               } else if ((!idle || idle->exit_latency == 
idle_exit_lat) &&
+                                       rq->idle_stamp > idle_stamp) {
+                                       /* Pick most recent idle cpu. */
+                                       idle_stamp = rq->idle_stamp;
+                                       idle_stamp_cpu = chk_cpu;
+                               }
+                       }
                }

-               if (tmp->flags & sd_flag)
-                       sd = tmp;
-               else if (!want_affine)
-                       break;
-       }
+               if (idle_stamp_cpu != -1) {
+                       /* Found suitable unused cpu */
+                       new_cpu = idle_stamp_cpu;
+               } else {
+                       /* There was no idle cpus available, must load balance. 
*/
+                       for_each_domain(cpu, tmp) {
+                               if (!(tmp->flags & SD_LOAD_BALANCE))
+                                       break;

-       if (affine_sd) {
-               sd = NULL; /* Prefer wake_affine over balance flags */
-               if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, 
sync))
-                       new_cpu = cpu;
-       }
+                               /* If both cpu and prev_cpu are part of this 
domain,
+                                * cpu is a valid SD_WAKE_AFFINE target.
+                                */
+                               if (want_affine && (tmp->flags & SD_WAKE_AFFINE) 
&&
+                                       cpumask_test_cpu(prev_cpu, 
sched_domain_span(tmp))) {
+                                               affine_sd = tmp;
+                                               break;
+                               }
+
+                               if (tmp->flags & sd_flag)
+                                       sd = tmp;
+                               else if (!want_affine)
+                                       break;
+                       }

-       if (!sd) {
-               if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
-                       new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
+                       if (affine_sd) {
+                               sd = NULL; /* Prefer wake_affine over balance 
flags */
+                               if (cpu != prev_cpu && wake_affine(affine_sd, 
p, prev_cpu, sync))
+                                       new_cpu = cpu;
+                       }

-       } else while (sd) {
-               struct sched_group *group;
-               int weight;
+                       if (!sd) {
+                               if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? 
*/
+                                       new_cpu = select_idle_sibling(p, 
prev_cpu, new_cpu);

-               if (!(sd->flags & sd_flag)) {
-                       sd = sd->child;
-                       continue;
-               }
+                       } else while (sd) {
+                               struct sched_group *group;
+                               int weight;

-               group = find_idlest_group(sd, p, cpu, sd_flag);
-               if (!group) {
-                       sd = sd->child;
-                       continue;
-               }
+                               if (!(sd->flags & sd_flag)) {
+                                       sd = sd->child;
+                                       continue;
+                               }

-               new_cpu = find_idlest_cpu(group, p, cpu);
-               if (new_cpu == -1 || new_cpu == cpu) {
-                       /* Now try balancing at a lower domain level of cpu */
-                       sd = sd->child;
-                       continue;
-               }
+                               group = find_idlest_group(sd, p, cpu, sd_flag);
+                               if (!group) {
+                                       sd = sd->child;
+                                       continue;
+                               }

-               /* Now try balancing at a lower domain level of new_cpu */
-               cpu = new_cpu;
-               weight = sd->span_weight;
-               sd = NULL;
-               for_each_domain(cpu, tmp) {
-                       if (weight <= tmp->span_weight)
-                               break;
-                       if (tmp->flags & sd_flag)
-                               sd = tmp;
+                               new_cpu = find_idlest_cpu(group, p, cpu);
+                               if (new_cpu == -1 || new_cpu == cpu) {
+                                       /* Now try balancing at a lower domain 
level of cpu */
+                                       sd = sd->child;
+                                       continue;
+                               }
+
+                               /* Now try balancing at a lower domain level of 
new_cpu */
+                               cpu = new_cpu;
+                               weight = sd->span_weight;
+                               sd = NULL;
+                               for_each_domain(cpu, tmp) {
+                                       if (weight <= tmp->span_weight)
+                                               break;
+                                       if (tmp->flags & sd_flag)
+                                               sd = tmp;
+                               }
+                               /* while loop will break here if sd == NULL */
+                       }
                }
-               /* while loop will break here if sd == NULL */
        }
        rcu_read_unlock();

--
2.12.0
^---
I'm faily newbie to the kernel development. please be nice.
I hope this email and the attached patch is useful. I did not write the
orginal
code but I have tinkered the code as an experiment quite lot.
The attached patch is the result of this tinkering.
The code most probaly is incorrect and has side-effects/regressions.
However, I have ran multiple kernels with this applied and it never
caused any severe problems.



Reply via email to