With hierarchical runqueues and locks at each level, it is often necessary to get locks at different level. Introduce the second of two locking strategies, which is suitable for progressing upwards through the hierarchy with minimal impact on lock contention.
During enqueuing and dequeuing, a scheduling entity is recursively added or removed from the runqueue hierarchy. This is an upwards only progression through the hierarchy, which does not care about which CPU is responsible for a certain part of the hierarchy. Hence, it is not necessary to hold on to a lock of a lower hierarchy level once we moved on to a higher one. Introduce rq_chain_init(), rq_chain_lock(), and rq_chain_unlock(), which implement lock chaining, where the previous lock is only released after we acquired the next one, so that concurrent operations cannot overtake each other. The functions can be used even when parts have already been locked via rq_lock_owned(), as for example dequeueing might happen during task selection if a runqueue is throttled. Signed-off-by: Jan H. Schönherr <jscho...@amazon.de> --- kernel/sched/cosched.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 14 +++++++++++++ 2 files changed, 67 insertions(+) diff --git a/kernel/sched/cosched.c b/kernel/sched/cosched.c index df62ee6d0520..f2d51079b3db 100644 --- a/kernel/sched/cosched.c +++ b/kernel/sched/cosched.c @@ -608,3 +608,56 @@ void rq_unlock_owned(struct rq *rq, struct rq_owner_flags *orf) rq_unlock(rq, &rq->sdrq_data.rf); } } + +void rq_chain_init(struct rq_chain *rc, struct rq *rq) +{ + bool parent_locked = rq->sdrq_data.parent_locked; + + WARN_ON_ONCE(!irqs_disabled()); + lockdep_assert_held(&rq->lock); + + rq = parent_rq(rq); + while (parent_locked) { + lockdep_assert_held(&rq->lock); + parent_locked = rq->sdrq_data.parent_locked; + rq = parent_rq(rq); + } + + rc->next = rq; + rc->curr = NULL; +} + +void rq_chain_unlock(struct rq_chain *rc) +{ + if (rc->curr) + rq_unlock(rc->curr, &rc->rf); +} + +void rq_chain_lock(struct rq_chain *rc, struct sched_entity *se) +{ + struct cfs_rq *cfs_rq = se->cfs_rq; + struct rq *rq = cfs_rq->rq; + + if (!is_sd_se(se) || rc->curr == rq) { + lockdep_assert_held(&rq->lock); + return; + } + + if (rq == rc->next) { + struct rq_flags rf = rc->rf; + + /* Get the new lock (and release previous lock afterwards) */ + rq_lock(rq, &rc->rf); + + if (rc->curr) { + lockdep_assert_held(&rc->curr->lock); + rq_unlock(rc->curr, &rf); + } + + rc->curr = rq; + rc->next = parent_rq(rq); + + /* FIXME: Only update clock, when necessary */ + update_rq_clock(rq); + } +} diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7dba8fdc48c7..48939c8e539d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -513,6 +513,14 @@ struct rq_owner_flags { #endif }; +struct rq_chain { +#ifdef CONFIG_COSCHEDULING + struct rq *next; + struct rq *curr; + struct rq_flags rf; +#endif +}; + #ifdef CONFIG_COSCHEDULING struct sdrq_data { /* @@ -1206,6 +1214,9 @@ void cosched_online_group(struct task_group *tg); void cosched_offline_group(struct task_group *tg); struct rq *rq_lock_owned(struct rq *rq, struct rq_owner_flags *orf); void rq_unlock_owned(struct rq *rq, struct rq_owner_flags *orf); +void rq_chain_init(struct rq_chain *rc, struct rq *rq); +void rq_chain_unlock(struct rq_chain *rc); +void rq_chain_lock(struct rq_chain *rc, struct sched_entity *se); #else /* !CONFIG_COSCHEDULING */ static inline void cosched_init_bottom(void) { } static inline void cosched_init_topology(void) { } @@ -1217,6 +1228,9 @@ static inline void cosched_online_group(struct task_group *tg) { } static inline void cosched_offline_group(struct task_group *tg) { } static inline struct rq *rq_lock_owned(struct rq *rq, struct rq_owner_flags *orf) { return rq; } static inline void rq_unlock_owned(struct rq *rq, struct rq_owner_flags *orf) { } +static inline void rq_chain_init(struct rq_chain *rc, struct rq *rq) { } +static inline void rq_chain_unlock(struct rq_chain *rc) { } +static inline void rq_chain_lock(struct rq_chain *rc, struct sched_entity *se) { } #endif /* !CONFIG_COSCHEDULING */ #ifdef CONFIG_SCHED_SMT -- 2.9.3.1.gcba166c.dirty