On Tue, Apr 02, 2019 at 10:28:12AM +0200, Peter Zijlstra wrote: > Another approach would be something like the below: > > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -87,7 +87,7 @@ static inline int __task_prio(struct tas > */ > > /* real prio, less is less */ > -static inline bool __prio_less(struct task_struct *a, struct task_struct *b, > bool runtime) > +static inline bool __prio_less(struct task_struct *a, struct task_struct *b, > u64 vruntime) > { > int pa = __task_prio(a), pb = __task_prio(b); > > @@ -104,21 +104,25 @@ static inline bool __prio_less(struct ta > if (pa == -1) /* dl_prio() doesn't work because of stop_class above */ > return !dl_time_before(a->dl.deadline, b->dl.deadline); > > - if (pa == MAX_RT_PRIO + MAX_NICE && runtime) /* fair */ > - return !((s64)(a->se.vruntime - b->se.vruntime) < 0); > + if (pa == MAX_RT_PRIO + MAX_NICE) /* fair */ > + return !((s64)(a->se.vruntime - vruntime) < 0); > > return false; > } > > static inline bool cpu_prio_less(struct task_struct *a, struct task_struct > *b) > { > - return __prio_less(a, b, true); > + return __prio_less(a, b, b->se.vruntime); > } > > static inline bool core_prio_less(struct task_struct *a, struct task_struct > *b) > { > - /* cannot compare vruntime across CPUs */ > - return __prio_less(a, b, false); > + u64 vruntime = b->se.vruntime; > + > + vruntime -= task_rq(b)->cfs.min_vruntime; > + vruntime += task_rq(a)->cfs.min_vruntime > + > + return __prio_less(a, b, vruntime); > } > > static inline bool __sched_core_less(struct task_struct *a, struct > task_struct *b)
Brilliant, I like this approach, it makes core_prio_less() work across CPUs. So I tested this, together with changing cpu_prio_less(max, class_pick) to core_prio_less(max, class_pick) in pick_task(), this problem is gone :-) I verified with below debug code: diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cb24a0141e57..50658e79363f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3832,6 +3832,14 @@ next_class:; WARN_ON_ONCE(!rq_i->core_pick); + if (rq->core->core_cookie && rq_i->core_pick->core_cookie && + rq->core->core_cookie != rq_i->core_pick->core_cookie) { + trace_printk("expect 0x%lx, cpu%d got 0x%lx\n", + rq->core->core_cookie, i, + rq_i->core_pick->core_cookie); + WARN_ON_ONCE(1); + } + rq_i->core_pick->core_occupation = occ; if (i == cpu) -- 2.19.1.3.ge56e4f7