The core pick loop grew a lot of warts over time to support
optimizations. Turns out that that directly doing a class pick before
entering the core-wide pick is better for readability. Make the changes.

Since this is a relatively new patch, make it a separate patch so that
it is easier to revert in case anyone reports an issue with it. Testing
shows it to be working for me.

Reviewed-by: Vineeth Pillai <virem...@linux.microsoft.com>
Suggested-by: Peter Zijlstra (Intel) <pet...@infradead.org>
Signed-off-by: Joel Fernandes (Google) <j...@joelfernandes.org>
---
 kernel/sched/core.c | 73 ++++++++++++++++-----------------------------
 1 file changed, 26 insertions(+), 47 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6aa76de55ef2..12e8e6627ab3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5180,6 +5180,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, 
struct rq_flags *rf)
        put_prev_task_balance(rq, prev, rf);
 
        smt_mask = cpu_smt_mask(cpu);
+       need_sync = !!rq->core->core_cookie;
+
+       /* reset state */
+       rq->core->core_cookie = 0UL;
+       if (rq->core->core_forceidle) {
+               need_sync = true;
+               fi_before = true;
+               rq->core->core_forceidle = false;
+       }
 
        /*
         * core->core_task_seq, core->core_pick_seq, rq->core_sched_seq
@@ -5192,16 +5201,25 @@ pick_next_task(struct rq *rq, struct task_struct *prev, 
struct rq_flags *rf)
         * 'Fix' this by also increasing @task_seq for every pick.
         */
        rq->core->core_task_seq++;
-       need_sync = !!rq->core->core_cookie;
 
-       /* reset state */
-reset:
-       rq->core->core_cookie = 0UL;
-       if (rq->core->core_forceidle) {
+       /*
+        * Optimize for common case where this CPU has no cookies
+        * and there are no cookied tasks running on siblings.
+        */
+       if (!need_sync) {
+               for_each_class(class) {
+                       next = class->pick_task(rq);
+                       if (next)
+                               break;
+               }
+
+               if (!next->core_cookie) {
+                       rq->core_pick = NULL;
+                       goto done;
+               }
                need_sync = true;
-               fi_before = true;
-               rq->core->core_forceidle = false;
        }
+
        for_each_cpu(i, smt_mask) {
                struct rq *rq_i = cpu_rq(i);
 
@@ -5239,38 +5257,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, 
struct rq_flags *rf)
                         * core.
                         */
                        p = pick_task(rq_i, class, max);
-                       if (!p) {
-                               /*
-                                * If there weren't no cookies; we don't need to
-                                * bother with the other siblings.
-                                */
-                               if (i == cpu && !need_sync)
-                                       goto next_class;
-
+                       if (!p)
                                continue;
-                       }
-
-                       /*
-                        * Optimize the 'normal' case where there aren't any
-                        * cookies and we don't need to sync up.
-                        */
-                       if (i == cpu && !need_sync) {
-                               if (p->core_cookie) {
-                                       /*
-                                        * This optimization is only valid as
-                                        * long as there are no cookies
-                                        * involved. We may have skipped
-                                        * non-empty higher priority classes on
-                                        * siblings, which are empty on this
-                                        * CPU, so start over.
-                                        */
-                                       need_sync = true;
-                                       goto reset;
-                               }
-
-                               next = p;
-                               goto done;
-                       }
 
                        rq_i->core_pick = p;
 
@@ -5298,18 +5286,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev, 
struct rq_flags *rf)
                                                cpu_rq(j)->core_pick = NULL;
                                        }
                                        goto again;
-                               } else {
-                                       /*
-                                        * Once we select a task for a cpu, we
-                                        * should not be doing an unconstrained
-                                        * pick because it might starve a task
-                                        * on a forced idle cpu.
-                                        */
-                                       need_sync = true;
                                }
                        }
                }
-next_class:;
        }
 
        rq->core->core_pick_seq = rq->core->core_task_seq;
-- 
2.29.2.299.gdc1121823c-goog

Reply via email to