Reducing the overhead of the CPU controller is achieved by not walking all the sched_entities every time a task is enqueued or dequeued.
One of the things being checked every single time is whether the cfs_rq is on the rq->leaf_cfs_rq_list. By only removing a cfs_rq from the list once it no longer has children on the list, we can avoid walking the sched_entity hierarchy if the bottom cfs_rq is on the list, once the runqueues have been flattened. Signed-off-by: Rik van Riel <r...@surriel.com> Suggested-by: Vincent Guittot <vincent.guit...@linaro.org> --- kernel/sched/fair.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a48d0dbfc232..04b216234265 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -369,6 +369,39 @@ static inline void assert_list_leaf_cfs_rq(struct rq *rq) SCHED_WARN_ON(rq->tmp_alone_branch != &rq->leaf_cfs_rq_list); } +/* + * Because list_add_leaf_cfs_rq always places a child cfs_rq on the list + * immediately before a parent cfs_rq, and cfs_rqs are removed from the list + * bottom-up, we only have to test whether the cfs_rq before us on the list + * is our child. + */ +static inline bool child_cfs_rq_on_list(struct cfs_rq *cfs_rq) +{ + struct cfs_rq *prev_cfs_rq; + struct list_head *prev; + + prev = cfs_rq->leaf_cfs_rq_list.prev; + prev_cfs_rq = container_of(prev, struct cfs_rq, leaf_cfs_rq_list); + + return (prev_cfs_rq->tg->parent == cfs_rq->tg); +} + +/* + * Remove a cfs_rq from the list if it has no children on the list. + * The scheduler iterates over the list regularly; if conditions for + * removal are still true, we'll get to this cfs_rq in the future. + */ +static inline void list_del_leaf_cfs_rq_bottom(struct cfs_rq *cfs_rq) +{ + if (!cfs_rq->on_list) + return; + + if (child_cfs_rq_on_list(cfs_rq)) + return; + + list_del_leaf_cfs_rq(cfs_rq); +} + /* Iterate thr' all leaf cfs_rq's on a runqueue */ #define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \ list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list, \ @@ -7723,7 +7756,7 @@ static void update_blocked_averages(int cpu) * decayed cfs_rqs linger on the list. */ if (cfs_rq_is_decayed(cfs_rq)) - list_del_leaf_cfs_rq(cfs_rq); + list_del_leaf_cfs_rq_bottom(cfs_rq); /* Don't need periodic decay once load/util_avg are null */ if (cfs_rq_has_blocked(cfs_rq)) -- 2.20.1