On 6/12/19 9:32 PM, Rik van Riel wrote: > Use an explicit "cfs_rq of parent sched_entity" helper in a few > strategic places, where cfs_rq_of(se) may no longer point at the > right runqueue once we flatten the hierarchical cgroup runqueues. > > No functional change. > > Signed-off-by: Rik van Riel <r...@surriel.com> > --- > kernel/sched/fair.c | 17 +++++++++++++---- > 1 file changed, 13 insertions(+), 4 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index dcc521d251e3..c6ede2ecc935 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -275,6 +275,15 @@ static inline struct cfs_rq *group_cfs_rq(struct > sched_entity *grp) > return grp->my_q; > } > > +/* runqueue owned by the parent entity */ > +static inline struct cfs_rq *group_cfs_rq_of_parent(struct sched_entity *se) > +{ > + if (se->parent) > + return group_cfs_rq(se->parent); > + > + return &cfs_rq_of(se)->rq->cfs; > +} > + > static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) > { > struct rq *rq = rq_of(cfs_rq); > @@ -3298,7 +3307,7 @@ static inline int propagate_entity_load_avg(struct > sched_entity *se) > > gcfs_rq->propagate = 0; > > - cfs_rq = cfs_rq_of(se); > + cfs_rq = group_cfs_rq_of_parent(se); > > add_tg_cfs_propagate(cfs_rq, gcfs_rq->prop_runnable_sum); > > @@ -7779,7 +7788,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) > > WRITE_ONCE(cfs_rq->h_load_next, NULL); > for_each_sched_entity(se) { > - cfs_rq = cfs_rq_of(se); > + cfs_rq = group_cfs_rq_of_parent(se);
Why do you change this here? task_se_h_load() calls update_cfs_rq_h_load() with cfs_rq = group_cfs_rq_of_parent(se) because the task might not be on the cfs_rq yet. But inside update_cfs_rq_h_load() the first se is derived from cfs_rq->tg->se[cpu_of(rq)] so in the first for_each_sched_entity() loop we should still start with group_cfs_rq() (se->my_q) ? The system doesn't barf with these two WARN_ON's in. @@ -7663,12 +7673,17 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) unsigned long now = jiffies; unsigned long load; + WARN_ON(se && (se != group_cfs_rq(se)->tg->se[cpu_of(rq)])); + if (cfs_rq->last_h_load_update == now) return; WRITE_ONCE(cfs_rq->h_load_next, NULL); for_each_sched_entity(se) { cfs_rq = group_cfs_rq_of_parent(se); + + WARN_ON(se != group_cfs_rq(se)->tg->se[cpu_of(rq)]); + WRITE_ONCE(cfs_rq->h_load_next, se); if (cfs_rq->last_h_load_update == now) break; [...]