Re: [tip: sched/core] sched: Replace rq::wake_list

2020-06-02 Thread Frederic Weisbecker
On Mon, Jun 01, 2020 at 09:52:18AM -, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the sched/core branch of tip:
> 
> Commit-ID: a148866489fbe243c936fe43e4525d8dbfa0318f
> Gitweb:
> https://git.kernel.org/tip/a148866489fbe243c936fe43e4525d8dbfa0318f
> Author:Peter Zijlstra 
> AuthorDate:Tue, 26 May 2020 18:11:04 +02:00
> Committer: Ingo Molnar 
> CommitterDate: Thu, 28 May 2020 10:54:16 +02:00
> 
> sched: Replace rq::wake_list
> 
> The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
> got smp_call_function_single_async() subtly wrong. Even though it will
> return -EBUSY when trying to re-use a csd, that condition is not
> atomic and still requires external serialization.
> 
> The change in ttwu_queue_remote() got this wrong.
> 
> While on first reading ttwu_queue_remote() has an atomic test-and-set
> that appears to serialize the use, the matching 'release' is not in
> the right place to actually guarantee this serialization.
> 
> The actual race is vs the sched_ttwu_pending() call in the idle loop;
> that can run the wakeup-list without consuming the CSD.
> 
> Instead of trying to chain the lists, merge them.
> 
> Signed-off-by: Peter Zijlstra (Intel) 
> Signed-off-by: Ingo Molnar 
> Link: https://lore.kernel.org/r/20200526161908.129371...@infradead.org

Looks good, thanks :)


[tip: sched/core] sched: Replace rq::wake_list

2020-06-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: a148866489fbe243c936fe43e4525d8dbfa0318f
Gitweb:
https://git.kernel.org/tip/a148866489fbe243c936fe43e4525d8dbfa0318f
Author:Peter Zijlstra 
AuthorDate:Tue, 26 May 2020 18:11:04 +02:00
Committer: Ingo Molnar 
CommitterDate: Thu, 28 May 2020 10:54:16 +02:00

sched: Replace rq::wake_list

The recent commit: 90b5363acd47 ("sched: Clean up scheduler_ipi()")
got smp_call_function_single_async() subtly wrong. Even though it will
return -EBUSY when trying to re-use a csd, that condition is not
atomic and still requires external serialization.

The change in ttwu_queue_remote() got this wrong.

While on first reading ttwu_queue_remote() has an atomic test-and-set
that appears to serialize the use, the matching 'release' is not in
the right place to actually guarantee this serialization.

The actual race is vs the sched_ttwu_pending() call in the idle loop;
that can run the wakeup-list without consuming the CSD.

Instead of trying to chain the lists, merge them.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Link: https://lore.kernel.org/r/20200526161908.129371...@infradead.org
---
 include/linux/sched.h |  1 +-
 include/linux/smp.h   |  1 +-
 kernel/sched/core.c   | 25 ++
 kernel/sched/idle.c   |  1 +-
 kernel/sched/sched.h  |  8 +---
 kernel/smp.c  | 47 +++---
 6 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ebc6870..e0f5f41 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -654,6 +654,7 @@ struct task_struct {
 
 #ifdef CONFIG_SMP
struct llist_node   wake_entry;
+   unsigned intwake_entry_type;
int on_cpu;
 #ifdef CONFIG_THREAD_INFO_IN_TASK
/* Current CPU: */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 45ad6e3..84f90e2 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -25,6 +25,7 @@ enum {
CSD_TYPE_ASYNC  = 0x00,
CSD_TYPE_SYNC   = 0x10,
CSD_TYPE_IRQ_WORK   = 0x20,
+   CSD_TYPE_TTWU   = 0x30,
CSD_FLAG_TYPE_MASK  = 0xF0,
 };
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b71ed5e..b3c64c6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1538,7 +1538,7 @@ static int migration_cpu_stop(void *data)
 * __migrate_task() such that we will not miss enforcing cpus_ptr
 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
 */
-   sched_ttwu_pending();
+   flush_smp_call_function_from_idle();
 
raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);
@@ -2272,14 +2272,13 @@ static int ttwu_remote(struct task_struct *p, int 
wake_flags)
 }
 
 #ifdef CONFIG_SMP
-void sched_ttwu_pending(void)
+void sched_ttwu_pending(void *arg)
 {
+   struct llist_node *llist = arg;
struct rq *rq = this_rq();
-   struct llist_node *llist;
struct task_struct *p, *t;
struct rq_flags rf;
 
-   llist = llist_del_all(&rq->wake_list);
if (!llist)
return;
 
@@ -2299,11 +2298,6 @@ void sched_ttwu_pending(void)
rq_unlock_irqrestore(rq, &rf);
 }
 
-static void wake_csd_func(void *info)
-{
-   sched_ttwu_pending();
-}
-
 void send_call_function_single_ipi(int cpu)
 {
struct rq *rq = cpu_rq(cpu);
@@ -2327,12 +2321,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, 
int cpu, int wake_flags
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
 
WRITE_ONCE(rq->ttwu_pending, 1);
-   if (llist_add(&p->wake_entry, &rq->wake_list)) {
-   if (!set_nr_if_polling(rq->idle))
-   smp_call_function_single_async(cpu, &rq->wake_csd);
-   else
-   trace_sched_wake_idle_without_ipi(cpu);
-   }
+   __smp_call_single_queue(cpu, &p->wake_entry);
 }
 
 void wake_up_if_idle(int cpu)
@@ -2772,6 +2761,9 @@ static void __sched_fork(unsigned long clone_flags, 
struct task_struct *p)
p->capture_control = NULL;
 #endif
init_numa_balancing(clone_flags, p);
+#ifdef CONFIG_SMP
+   p->wake_entry_type = CSD_TYPE_TTWU;
+#endif
 }
 
 DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -6564,7 +6556,6 @@ int sched_cpu_dying(unsigned int cpu)
struct rq_flags rf;
 
/* Handle pending wakeups and then migrate everything off */
-   sched_ttwu_pending();
sched_tick_stop(cpu);
 
rq_lock_irqsave(rq, &rf);
@@ -6763,8 +6754,6 @@ void __init sched_init(void)
rq->avg_idle = 2*sysctl_sched_migration_cost;
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 
-   rq_csd_init(rq, &rq->wake_csd, wake_csd_func);
-
INIT_LIST_HEAD(&rq->cfs_tasks);
 
rq_attac