[tip:sched/core] sched/fair: Remove double_lock_balance() from load_balance()
Commit-ID: 163122b7fcfa28c0e4a838fcc8043c616746802e Gitweb: http://git.kernel.org/tip/163122b7fcfa28c0e4a838fcc8043c616746802e Author: Kirill Tkhai AuthorDate: Wed, 20 Aug 2014 13:48:29 +0400 Committer: Ingo Molnar CommitDate: Wed, 20 Aug 2014 14:53:05 +0200 sched/fair: Remove double_lock_balance() from load_balance() Avoid double_rq_lock() and use TASK_ON_RQ_MIGRATING for load_balance(). The advantage is (obviously) not holding two rq->lock's at the same time and thereby increasing parallelism. Further note that if there was no task to migrate we will not have acquired the second rq->lock at all. The important point to note is that because we acquire dst->lock immediately after releasing src->lock the potential wait time of task_rq_lock() callers on TASK_ON_RQ_MIGRATING is not longer than it would have been in the double rq lock scenario. Signed-off-by: Kirill Tkhai Cc: Peter Zijlstra Cc: Paul Turner Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Mike Galbraith Cc: Kirill Tkhai Cc: Tim Chen Cc: Nicolas Pitre Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1408528109.23412.94.camel@tkhai Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 151 ++-- 1 file changed, 99 insertions(+), 52 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7e5cf05..d3427a8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4709,7 +4709,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; /* -* This is possible from callers such as move_task(), in which we +* This is possible from callers such as attach_tasks(), in which we * unconditionally check_prempt_curr() after an enqueue (which may have * lead to a throttle). This both saves work and prevents false * next-buddy nomination below. @@ -5117,21 +5117,10 @@ struct lb_env { unsigned intloop_max; enum fbq_type fbq_type; + struct list_headtasks; }; /* - * move_task - move a task from one runqueue to another runqueue. - * Both runqueues must be locked. - */ -static void move_task(struct task_struct *p, struct lb_env *env) -{ - deactivate_task(env->src_rq, p, 0); - set_task_cpu(p, env->dst_cpu); - activate_task(env->dst_rq, p, 0); - check_preempt_curr(env->dst_rq, p, 0); -} - -/* * Is this task likely cache-hot: */ static int task_hot(struct task_struct *p, struct lb_env *env) @@ -5346,6 +5335,18 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) } /* + * detach_task() -- detach the task for the migration specified in env + */ +static void detach_task(struct task_struct *p, struct lb_env *env) +{ + lockdep_assert_held(>src_rq->lock); + + deactivate_task(env->src_rq, p, 0); + p->on_rq = TASK_ON_RQ_MIGRATING; + set_task_cpu(p, env->dst_cpu); +} + +/* * detach_one_task() -- tries to dequeue exactly one task from env->src_rq, as * part of active balancing operations within "domain". * @@ -5361,15 +5362,13 @@ static struct task_struct *detach_one_task(struct lb_env *env) if (!can_migrate_task(p, env)) continue; - deactivate_task(env->src_rq, p, 0); - p->on_rq = TASK_ON_RQ_MIGRATING; - set_task_cpu(p, env->dst_cpu); + detach_task(p, env); /* * Right now, this is only the second place where -* lb_gained[env->idle] is updated (other is move_tasks) +* lb_gained[env->idle] is updated (other is detach_tasks) * so we can safely collect stats here rather than -* inside move_tasks(). +* inside detach_tasks(). */ schedstat_inc(env->sd, lb_gained[env->idle]); return p; @@ -5377,35 +5376,22 @@ static struct task_struct *detach_one_task(struct lb_env *env) return NULL; } -/* - * attach_one_task() -- attaches the task returned from detach_one_task() to - * its new rq. - */ -static void attach_one_task(struct rq *rq, struct task_struct *p) -{ - raw_spin_lock(>lock); - BUG_ON(task_rq(p) != rq); - p->on_rq = TASK_ON_RQ_QUEUED; - activate_task(rq, p, 0); - check_preempt_curr(rq, p, 0); - raw_spin_unlock(>lock); -} - static const unsigned int sched_nr_migrate_break = 32; /* - * move_tasks tries to move up to imbalance weighted load from busiest to - * this_rq, as part of a balancing operation within domain "sd". - * Returns 1 if successful and 0 otherwise. + * detach_tasks() -- tries to detach up to imbalance weighted load from + * busiest_rq, as part of a balancing operation within domain "sd". * - * Called with both runqueues locked. + * Returns number of detached tasks if successful and 0 otherwise. */ -static int move_tasks(struct
[tip:sched/core] sched/fair: Remove double_lock_balance() from load_balance()
Commit-ID: 163122b7fcfa28c0e4a838fcc8043c616746802e Gitweb: http://git.kernel.org/tip/163122b7fcfa28c0e4a838fcc8043c616746802e Author: Kirill Tkhai ktk...@parallels.com AuthorDate: Wed, 20 Aug 2014 13:48:29 +0400 Committer: Ingo Molnar mi...@kernel.org CommitDate: Wed, 20 Aug 2014 14:53:05 +0200 sched/fair: Remove double_lock_balance() from load_balance() Avoid double_rq_lock() and use TASK_ON_RQ_MIGRATING for load_balance(). The advantage is (obviously) not holding two rq-lock's at the same time and thereby increasing parallelism. Further note that if there was no task to migrate we will not have acquired the second rq-lock at all. The important point to note is that because we acquire dst-lock immediately after releasing src-lock the potential wait time of task_rq_lock() callers on TASK_ON_RQ_MIGRATING is not longer than it would have been in the double rq lock scenario. Signed-off-by: Kirill Tkhai ktk...@parallels.com Cc: Peter Zijlstra pet...@infradead.org Cc: Paul Turner p...@google.com Cc: Oleg Nesterov o...@redhat.com Cc: Steven Rostedt rost...@goodmis.org Cc: Mike Galbraith umgwanakikb...@gmail.com Cc: Kirill Tkhai tk...@yandex.ru Cc: Tim Chen tim.c.c...@linux.intel.com Cc: Nicolas Pitre nicolas.pi...@linaro.org Cc: Linus Torvalds torva...@linux-foundation.org Link: http://lkml.kernel.org/r/1408528109.23412.94.camel@tkhai Signed-off-by: Ingo Molnar mi...@kernel.org --- kernel/sched/fair.c | 151 ++-- 1 file changed, 99 insertions(+), 52 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7e5cf05..d3427a8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4709,7 +4709,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; /* -* This is possible from callers such as move_task(), in which we +* This is possible from callers such as attach_tasks(), in which we * unconditionally check_prempt_curr() after an enqueue (which may have * lead to a throttle). This both saves work and prevents false * next-buddy nomination below. @@ -5117,21 +5117,10 @@ struct lb_env { unsigned intloop_max; enum fbq_type fbq_type; + struct list_headtasks; }; /* - * move_task - move a task from one runqueue to another runqueue. - * Both runqueues must be locked. - */ -static void move_task(struct task_struct *p, struct lb_env *env) -{ - deactivate_task(env-src_rq, p, 0); - set_task_cpu(p, env-dst_cpu); - activate_task(env-dst_rq, p, 0); - check_preempt_curr(env-dst_rq, p, 0); -} - -/* * Is this task likely cache-hot: */ static int task_hot(struct task_struct *p, struct lb_env *env) @@ -5346,6 +5335,18 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) } /* + * detach_task() -- detach the task for the migration specified in env + */ +static void detach_task(struct task_struct *p, struct lb_env *env) +{ + lockdep_assert_held(env-src_rq-lock); + + deactivate_task(env-src_rq, p, 0); + p-on_rq = TASK_ON_RQ_MIGRATING; + set_task_cpu(p, env-dst_cpu); +} + +/* * detach_one_task() -- tries to dequeue exactly one task from env-src_rq, as * part of active balancing operations within domain. * @@ -5361,15 +5362,13 @@ static struct task_struct *detach_one_task(struct lb_env *env) if (!can_migrate_task(p, env)) continue; - deactivate_task(env-src_rq, p, 0); - p-on_rq = TASK_ON_RQ_MIGRATING; - set_task_cpu(p, env-dst_cpu); + detach_task(p, env); /* * Right now, this is only the second place where -* lb_gained[env-idle] is updated (other is move_tasks) +* lb_gained[env-idle] is updated (other is detach_tasks) * so we can safely collect stats here rather than -* inside move_tasks(). +* inside detach_tasks(). */ schedstat_inc(env-sd, lb_gained[env-idle]); return p; @@ -5377,35 +5376,22 @@ static struct task_struct *detach_one_task(struct lb_env *env) return NULL; } -/* - * attach_one_task() -- attaches the task returned from detach_one_task() to - * its new rq. - */ -static void attach_one_task(struct rq *rq, struct task_struct *p) -{ - raw_spin_lock(rq-lock); - BUG_ON(task_rq(p) != rq); - p-on_rq = TASK_ON_RQ_QUEUED; - activate_task(rq, p, 0); - check_preempt_curr(rq, p, 0); - raw_spin_unlock(rq-lock); -} - static const unsigned int sched_nr_migrate_break = 32; /* - * move_tasks tries to move up to imbalance weighted load from busiest to - * this_rq, as part of a balancing operation within domain sd. - * Returns 1 if successful and 0 otherwise. + * detach_tasks() -- tries to detach up to