Buddies are not very well defined with coscheduling. Usually, they bubble up the hierarchy on a single CPU to steer task picking either away from a certain task (yield a task: skip buddy) or towards a certain task (yield to a task, execute a woken task: next buddy; execute a recently preempted task: last buddy).
If we still allow buddies to bubble up the full hierarchy with coscheduling, then for example yielding a task would always yield the coscheduled set of tasks it is part of. If we keep effects constrained to a coscheduled set, then one set could never preempt another set. For now, we limit buddy activities to the scope of the leader that does the activity with an exception for preemption, which may operate in the scope of a different leader. That makes yielding behavior potentially weird and asymmetric for the time being, but it seems to work well for preemption. Signed-off-by: Jan H. Schönherr <jscho...@amazon.de> --- kernel/sched/fair.c | 51 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2227e4840355..6d64f4478fda 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3962,7 +3962,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) static void __clear_buddies_last(struct sched_entity *se) { - for_each_sched_entity(se) { + for_each_owned_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); if (cfs_rq->last != se) break; @@ -3973,7 +3973,7 @@ static void __clear_buddies_last(struct sched_entity *se) static void __clear_buddies_next(struct sched_entity *se) { - for_each_sched_entity(se) { + for_each_owned_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); if (cfs_rq->next != se) break; @@ -3984,7 +3984,7 @@ static void __clear_buddies_next(struct sched_entity *se) static void __clear_buddies_skip(struct sched_entity *se) { - for_each_sched_entity(se) { + for_each_owned_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); if (cfs_rq->skip != se) break; @@ -4005,6 +4005,18 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) __clear_buddies_skip(se); } +static void clear_buddies_lock(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ + struct rq_owner_flags orf; + + if (cfs_rq->last != se && cfs_rq->next != se && cfs_rq->skip != se) + return; + + rq_lock_owned(hrq_of(cfs_rq), &orf); + clear_buddies(cfs_rq, se); + rq_unlock_owned(hrq_of(cfs_rq), &orf); +} + static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void @@ -4028,7 +4040,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_stats_dequeue(cfs_rq, se, flags); - clear_buddies(cfs_rq, se); + clear_buddies_lock(cfs_rq, se); if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); @@ -6547,31 +6559,45 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) static void set_last_buddy(struct sched_entity *se) { + struct rq_owner_flags orf; + struct rq *rq; + if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE)) return; - for_each_sched_entity(se) { + rq = hrq_of(cfs_rq_of(se)); + + rq_lock_owned(rq, &orf); + for_each_owned_sched_entity(se) { if (SCHED_WARN_ON(!se->on_rq)) - return; + break; cfs_rq_of(se)->last = se; } + rq_unlock_owned(rq, &orf); } static void set_next_buddy(struct sched_entity *se) { + struct rq_owner_flags orf; + struct rq *rq; + if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE)) return; - for_each_sched_entity(se) { + rq = hrq_of(cfs_rq_of(se)); + + rq_lock_owned(rq, &orf); + for_each_owned_sched_entity(se) { if (SCHED_WARN_ON(!se->on_rq)) - return; + break; cfs_rq_of(se)->next = se; } + rq_unlock_owned(rq, &orf); } static void set_skip_buddy(struct sched_entity *se) { - for_each_sched_entity(se) + for_each_owned_sched_entity(se) cfs_rq_of(se)->skip = se; } @@ -6831,6 +6857,7 @@ static void yield_task_fair(struct rq *rq) struct task_struct *curr = rq->curr; struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se; + struct rq_owner_flags orf; /* * Are we the only task in the tree? @@ -6838,6 +6865,7 @@ static void yield_task_fair(struct rq *rq) if (unlikely(rq->nr_running == 1)) return; + rq_lock_owned(rq, &orf); clear_buddies(cfs_rq, se); if (curr->policy != SCHED_BATCH) { @@ -6855,21 +6883,26 @@ static void yield_task_fair(struct rq *rq) } set_skip_buddy(se); + rq_unlock_owned(rq, &orf); } static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt) { struct sched_entity *se = &p->se; + struct rq_owner_flags orf; /* throttled hierarchies are not runnable */ if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se))) return false; + rq_lock_owned(rq, &orf); + /* Tell the scheduler that we'd really like pse to run next. */ set_next_buddy(se); yield_task_fair(rq); + rq_unlock_owned(rq, &orf); return true; } -- 2.9.3.1.gcba166c.dirty