[tip:sched/core] sched/fair: Remove double_lock_balance() from load_balance()

2014-08-20 Thread tip-bot for Kirill Tkhai
Commit-ID:  163122b7fcfa28c0e4a838fcc8043c616746802e
Gitweb: http://git.kernel.org/tip/163122b7fcfa28c0e4a838fcc8043c616746802e
Author: Kirill Tkhai 
AuthorDate: Wed, 20 Aug 2014 13:48:29 +0400
Committer:  Ingo Molnar 
CommitDate: Wed, 20 Aug 2014 14:53:05 +0200

sched/fair: Remove double_lock_balance() from load_balance()

Avoid double_rq_lock() and use TASK_ON_RQ_MIGRATING for
load_balance(). The advantage is (obviously) not holding two
rq->lock's at the same time and thereby increasing parallelism.

Further note that if there was no task to migrate we will not
have acquired the second rq->lock at all.

The important point to note is that because we acquire dst->lock
immediately after releasing src->lock the potential wait time of
task_rq_lock() callers on TASK_ON_RQ_MIGRATING is not longer
than it would have been in the double rq lock scenario.

Signed-off-by: Kirill Tkhai 
Cc: Peter Zijlstra 
Cc: Paul Turner 
Cc: Oleg Nesterov 
Cc: Steven Rostedt 
Cc: Mike Galbraith 
Cc: Kirill Tkhai 
Cc: Tim Chen 
Cc: Nicolas Pitre 
Cc: Linus Torvalds 
Link: http://lkml.kernel.org/r/1408528109.23412.94.camel@tkhai
Signed-off-by: Ingo Molnar 
---
 kernel/sched/fair.c | 151 ++--
 1 file changed, 99 insertions(+), 52 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7e5cf05..d3427a8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4709,7 +4709,7 @@ static void check_preempt_wakeup(struct rq *rq, struct 
task_struct *p, int wake_
return;
 
/*
-* This is possible from callers such as move_task(), in which we
+* This is possible from callers such as attach_tasks(), in which we
 * unconditionally check_prempt_curr() after an enqueue (which may have
 * lead to a throttle).  This both saves work and prevents false
 * next-buddy nomination below.
@@ -5117,21 +5117,10 @@ struct lb_env {
unsigned intloop_max;
 
enum fbq_type   fbq_type;
+   struct list_headtasks;
 };
 
 /*
- * move_task - move a task from one runqueue to another runqueue.
- * Both runqueues must be locked.
- */
-static void move_task(struct task_struct *p, struct lb_env *env)
-{
-   deactivate_task(env->src_rq, p, 0);
-   set_task_cpu(p, env->dst_cpu);
-   activate_task(env->dst_rq, p, 0);
-   check_preempt_curr(env->dst_rq, p, 0);
-}
-
-/*
  * Is this task likely cache-hot:
  */
 static int task_hot(struct task_struct *p, struct lb_env *env)
@@ -5346,6 +5335,18 @@ int can_migrate_task(struct task_struct *p, struct 
lb_env *env)
 }
 
 /*
+ * detach_task() -- detach the task for the migration specified in env
+ */
+static void detach_task(struct task_struct *p, struct lb_env *env)
+{
+   lockdep_assert_held(>src_rq->lock);
+
+   deactivate_task(env->src_rq, p, 0);
+   p->on_rq = TASK_ON_RQ_MIGRATING;
+   set_task_cpu(p, env->dst_cpu);
+}
+
+/*
  * detach_one_task() -- tries to dequeue exactly one task from env->src_rq, as
  * part of active balancing operations within "domain".
  *
@@ -5361,15 +5362,13 @@ static struct task_struct *detach_one_task(struct 
lb_env *env)
if (!can_migrate_task(p, env))
continue;
 
-   deactivate_task(env->src_rq, p, 0);
-   p->on_rq = TASK_ON_RQ_MIGRATING;
-   set_task_cpu(p, env->dst_cpu);
+   detach_task(p, env);
 
/*
 * Right now, this is only the second place where
-* lb_gained[env->idle] is updated (other is move_tasks)
+* lb_gained[env->idle] is updated (other is detach_tasks)
 * so we can safely collect stats here rather than
-* inside move_tasks().
+* inside detach_tasks().
 */
schedstat_inc(env->sd, lb_gained[env->idle]);
return p;
@@ -5377,35 +5376,22 @@ static struct task_struct *detach_one_task(struct 
lb_env *env)
return NULL;
 }
 
-/*
- * attach_one_task() -- attaches the task returned from detach_one_task() to
- * its new rq.
- */
-static void attach_one_task(struct rq *rq, struct task_struct *p)
-{
-   raw_spin_lock(>lock);
-   BUG_ON(task_rq(p) != rq);
-   p->on_rq = TASK_ON_RQ_QUEUED;
-   activate_task(rq, p, 0);
-   check_preempt_curr(rq, p, 0);
-   raw_spin_unlock(>lock);
-}
-
 static const unsigned int sched_nr_migrate_break = 32;
 
 /*
- * move_tasks tries to move up to imbalance weighted load from busiest to
- * this_rq, as part of a balancing operation within domain "sd".
- * Returns 1 if successful and 0 otherwise.
+ * detach_tasks() -- tries to detach up to imbalance weighted load from
+ * busiest_rq, as part of a balancing operation within domain "sd".
  *
- * Called with both runqueues locked.
+ * Returns number of detached tasks if successful and 0 otherwise.
  */
-static int move_tasks(struct 

[tip:sched/core] sched/fair: Remove double_lock_balance() from load_balance()

2014-08-20 Thread tip-bot for Kirill Tkhai
Commit-ID:  163122b7fcfa28c0e4a838fcc8043c616746802e
Gitweb: http://git.kernel.org/tip/163122b7fcfa28c0e4a838fcc8043c616746802e
Author: Kirill Tkhai ktk...@parallels.com
AuthorDate: Wed, 20 Aug 2014 13:48:29 +0400
Committer:  Ingo Molnar mi...@kernel.org
CommitDate: Wed, 20 Aug 2014 14:53:05 +0200

sched/fair: Remove double_lock_balance() from load_balance()

Avoid double_rq_lock() and use TASK_ON_RQ_MIGRATING for
load_balance(). The advantage is (obviously) not holding two
rq-lock's at the same time and thereby increasing parallelism.

Further note that if there was no task to migrate we will not
have acquired the second rq-lock at all.

The important point to note is that because we acquire dst-lock
immediately after releasing src-lock the potential wait time of
task_rq_lock() callers on TASK_ON_RQ_MIGRATING is not longer
than it would have been in the double rq lock scenario.

Signed-off-by: Kirill Tkhai ktk...@parallels.com
Cc: Peter Zijlstra pet...@infradead.org
Cc: Paul Turner p...@google.com
Cc: Oleg Nesterov o...@redhat.com
Cc: Steven Rostedt rost...@goodmis.org
Cc: Mike Galbraith umgwanakikb...@gmail.com
Cc: Kirill Tkhai tk...@yandex.ru
Cc: Tim Chen tim.c.c...@linux.intel.com
Cc: Nicolas Pitre nicolas.pi...@linaro.org
Cc: Linus Torvalds torva...@linux-foundation.org
Link: http://lkml.kernel.org/r/1408528109.23412.94.camel@tkhai
Signed-off-by: Ingo Molnar mi...@kernel.org
---
 kernel/sched/fair.c | 151 ++--
 1 file changed, 99 insertions(+), 52 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7e5cf05..d3427a8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4709,7 +4709,7 @@ static void check_preempt_wakeup(struct rq *rq, struct 
task_struct *p, int wake_
return;
 
/*
-* This is possible from callers such as move_task(), in which we
+* This is possible from callers such as attach_tasks(), in which we
 * unconditionally check_prempt_curr() after an enqueue (which may have
 * lead to a throttle).  This both saves work and prevents false
 * next-buddy nomination below.
@@ -5117,21 +5117,10 @@ struct lb_env {
unsigned intloop_max;
 
enum fbq_type   fbq_type;
+   struct list_headtasks;
 };
 
 /*
- * move_task - move a task from one runqueue to another runqueue.
- * Both runqueues must be locked.
- */
-static void move_task(struct task_struct *p, struct lb_env *env)
-{
-   deactivate_task(env-src_rq, p, 0);
-   set_task_cpu(p, env-dst_cpu);
-   activate_task(env-dst_rq, p, 0);
-   check_preempt_curr(env-dst_rq, p, 0);
-}
-
-/*
  * Is this task likely cache-hot:
  */
 static int task_hot(struct task_struct *p, struct lb_env *env)
@@ -5346,6 +5335,18 @@ int can_migrate_task(struct task_struct *p, struct 
lb_env *env)
 }
 
 /*
+ * detach_task() -- detach the task for the migration specified in env
+ */
+static void detach_task(struct task_struct *p, struct lb_env *env)
+{
+   lockdep_assert_held(env-src_rq-lock);
+
+   deactivate_task(env-src_rq, p, 0);
+   p-on_rq = TASK_ON_RQ_MIGRATING;
+   set_task_cpu(p, env-dst_cpu);
+}
+
+/*
  * detach_one_task() -- tries to dequeue exactly one task from env-src_rq, as
  * part of active balancing operations within domain.
  *
@@ -5361,15 +5362,13 @@ static struct task_struct *detach_one_task(struct 
lb_env *env)
if (!can_migrate_task(p, env))
continue;
 
-   deactivate_task(env-src_rq, p, 0);
-   p-on_rq = TASK_ON_RQ_MIGRATING;
-   set_task_cpu(p, env-dst_cpu);
+   detach_task(p, env);
 
/*
 * Right now, this is only the second place where
-* lb_gained[env-idle] is updated (other is move_tasks)
+* lb_gained[env-idle] is updated (other is detach_tasks)
 * so we can safely collect stats here rather than
-* inside move_tasks().
+* inside detach_tasks().
 */
schedstat_inc(env-sd, lb_gained[env-idle]);
return p;
@@ -5377,35 +5376,22 @@ static struct task_struct *detach_one_task(struct 
lb_env *env)
return NULL;
 }
 
-/*
- * attach_one_task() -- attaches the task returned from detach_one_task() to
- * its new rq.
- */
-static void attach_one_task(struct rq *rq, struct task_struct *p)
-{
-   raw_spin_lock(rq-lock);
-   BUG_ON(task_rq(p) != rq);
-   p-on_rq = TASK_ON_RQ_QUEUED;
-   activate_task(rq, p, 0);
-   check_preempt_curr(rq, p, 0);
-   raw_spin_unlock(rq-lock);
-}
-
 static const unsigned int sched_nr_migrate_break = 32;
 
 /*
- * move_tasks tries to move up to imbalance weighted load from busiest to
- * this_rq, as part of a balancing operation within domain sd.
- * Returns 1 if successful and 0 otherwise.
+ * detach_tasks() -- tries to detach up to