Vincent reported that the first task to a new task group's cfs_rq will be attached in attach_task_cfs_rq() and once more when it is enqueued (see https://lkml.org/lkml/2016/5/25/388).
Actually, it is worse, attach_task_cfs_rq() is invoked for new task even way before the new task is initiated in init_entity_runnable_average(). Solve this by avoiding attach as well as detach new task in task_move_group_fair(). To do it, we need to know whether the task is forked or not, so we pass this info all the way from sched_move_task() to attach_task_cfs_rq(). Reported-by: Vincent Guittot <[email protected]> Signed-off-by: Yuyang Du <[email protected]> --- kernel/sched/auto_group.c | 2 +- kernel/sched/core.c | 8 ++++---- kernel/sched/fair.c | 17 ++++++++++++----- kernel/sched/sched.h | 4 ++-- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index a5d966c..e5f0be2 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -143,7 +143,7 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) goto out; for_each_thread(p, t) - sched_move_task(t); + sched_move_task(t, 0); out: unlock_task_sighand(p, &flags); autogroup_kref_put(prev); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f2cae4..8585032 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7724,7 +7724,7 @@ void sched_offline_group(struct task_group *tg) * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to * reflect its new group. */ -void sched_move_task(struct task_struct *tsk) +void sched_move_task(struct task_struct *tsk, int fork) { struct task_group *tg; int queued, running; @@ -7753,7 +7753,7 @@ void sched_move_task(struct task_struct *tsk) #ifdef CONFIG_FAIR_GROUP_SCHED if (tsk->sched_class->task_move_group) - tsk->sched_class->task_move_group(tsk); + tsk->sched_class->task_move_group(tsk, fork); else #endif set_task_rq(tsk, task_cpu(tsk)); @@ -8186,7 +8186,7 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) static void cpu_cgroup_fork(struct task_struct *task) { - sched_move_task(task); + sched_move_task(task, 1); } static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) @@ -8213,7 +8213,7 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset) struct cgroup_subsys_state *css; cgroup_taskset_for_each(task, css, tset) - sched_move_task(task); + sched_move_task(task, 0); } #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a5bdbeb..5b34286 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2970,6 +2970,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s cfs_rq_util_change(cfs_rq); } +/* Catch up with the cfs_rq and then remove our sched avgs from it */ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)), @@ -8369,7 +8370,6 @@ static void detach_task_cfs_rq(struct task_struct *p) se->vruntime -= cfs_rq->min_vruntime; } - /* Catch up with the cfs_rq and remove our load when we leave */ detach_entity_load_avg(cfs_rq, se); } @@ -8386,7 +8386,7 @@ static void attach_task_cfs_rq(struct task_struct *p) se->depth = se->parent ? se->parent->depth + 1 : 0; #endif - /* Synchronize task with its cfs_rq */ + /* Synchronize and attach task to its cfs_rq */ attach_entity_load_avg(cfs_rq, se); if (!vruntime_normalized(p)) @@ -8468,11 +8468,18 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) } #ifdef CONFIG_FAIR_GROUP_SCHED -static void task_move_group_fair(struct task_struct *p) +static void task_move_group_fair(struct task_struct *p, int fork) { - detach_task_cfs_rq(p); + /* + * New task does not need detach or attach (see below) + */ + if (!fork) + detach_task_cfs_rq(p); + set_task_rq(p, task_cpu(p)); - attach_task_cfs_rq(p); + + if (!fork) + attach_task_cfs_rq(p); } void free_fair_sched_group(struct task_group *tg) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 72f1f30..58b1259 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -343,7 +343,7 @@ extern void sched_online_group(struct task_group *tg, extern void sched_destroy_group(struct task_group *tg); extern void sched_offline_group(struct task_group *tg); -extern void sched_move_task(struct task_struct *tsk); +extern void sched_move_task(struct task_struct *tsk, int fork); #ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); @@ -1247,7 +1247,7 @@ struct sched_class { void (*update_curr) (struct rq *rq); #ifdef CONFIG_FAIR_GROUP_SCHED - void (*task_move_group) (struct task_struct *p); + void (*task_move_group) (struct task_struct *p, int fork); #endif }; -- 1.7.9.5

