Threads targeting the same VM but which belong to different thread
groups is a tricky case. It has a few consequences:

It turns out that we cannot rely on get_nr_threads(p) to count the
number of threads using a VM. We can use
(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)
instead to skip the synchronize_sched() for cases where the VM only has
a single user, and that user only has a single thread.

It also turns out that we cannot use for_each_thread() to set
thread flags in all threads using a VM, as it only iterates on the
thread group.

Therefore, test the membarrier state variable directly rather than
relying on thread flags. This means
membarrier_register_private_expedited() needs to set the
MEMBARRIER_STATE_SWITCH_MM flag, issue synchronize_sched(), and only
then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows private
expedited membarrier commands to succeed. membarrier_arch_switch_mm()
now tests for the MEMBARRIER_STATE_SWITCH_MM flag.

Reported-by: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoy...@efficios.com>
CC: Paul E. McKenney <paul...@linux.vnet.ibm.com>
CC: Boqun Feng <boqun.f...@gmail.com>
CC: Andrew Hunter <a...@google.com>
CC: Maged Michael <maged.mich...@gmail.com>
CC: gro...@google.com
CC: Avi Kivity <a...@scylladb.com>
CC: Benjamin Herrenschmidt <b...@kernel.crashing.org>
CC: Paul Mackerras <pau...@samba.org>
CC: Michael Ellerman <m...@ellerman.id.au>
CC: Dave Watson <davejwat...@fb.com>
CC: Alan Stern <st...@rowland.harvard.edu>
CC: Will Deacon <will.dea...@arm.com>
CC: Andy Lutomirski <l...@kernel.org>
CC: Ingo Molnar <mi...@redhat.com>
CC: Alexander Viro <v...@zeniv.linux.org.uk>
CC: Nicholas Piggin <npig...@gmail.com>
CC: linuxppc-...@lists.ozlabs.org
CC: linux-a...@vger.kernel.org
---
 arch/powerpc/include/asm/membarrier.h | 21 ++-------------------
 arch/powerpc/kernel/membarrier.c      | 17 ++++-------------
 include/linux/mm_types.h              |  2 +-
 include/linux/sched/mm.h              | 28 ++++++----------------------
 kernel/fork.c                         |  2 --
 kernel/sched/membarrier.c             | 16 +++++++++++++---
 6 files changed, 26 insertions(+), 60 deletions(-)

diff --git a/arch/powerpc/include/asm/membarrier.h 
b/arch/powerpc/include/asm/membarrier.h
index 61152a7a3cf9..0951646253d9 100644
--- a/arch/powerpc/include/asm/membarrier.h
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -11,8 +11,8 @@ static inline void membarrier_arch_switch_mm(struct mm_struct 
*prev,
         * when switching from userspace to kernel is not needed after
         * store to rq->curr.
         */
-       if (likely(!test_ti_thread_flag(task_thread_info(tsk),
-                       TIF_MEMBARRIER_PRIVATE_EXPEDITED) || !prev))
+       if (likely(!(atomic_read(&next->membarrier_state)
+                       & MEMBARRIER_STATE_SWITCH_MM) || !prev))
                return;
 
        /*
@@ -21,23 +21,6 @@ static inline void membarrier_arch_switch_mm(struct 
mm_struct *prev,
         */
        smp_mb();
 }
-static inline void membarrier_arch_fork(struct task_struct *t,
-               unsigned long clone_flags)
-{
-       /*
-        * Coherence of TIF_MEMBARRIER_PRIVATE_EXPEDITED against thread
-        * fork is protected by siglock. membarrier_arch_fork is called
-        * with siglock held.
-        */
-       if (test_thread_flag(TIF_MEMBARRIER_PRIVATE_EXPEDITED))
-               set_ti_thread_flag(task_thread_info(t),
-                               TIF_MEMBARRIER_PRIVATE_EXPEDITED);
-}
-static inline void membarrier_arch_execve(struct task_struct *t)
-{
-       clear_ti_thread_flag(task_thread_info(t),
-                       TIF_MEMBARRIER_PRIVATE_EXPEDITED);
-}
 void membarrier_arch_register_private_expedited(struct task_struct *t);
 
 #endif /* _ASM_POWERPC_MEMBARRIER_H */
diff --git a/arch/powerpc/kernel/membarrier.c b/arch/powerpc/kernel/membarrier.c
index b0d79a5f5981..4795ad59b833 100644
--- a/arch/powerpc/kernel/membarrier.c
+++ b/arch/powerpc/kernel/membarrier.c
@@ -19,24 +19,15 @@
 #include <linux/thread_info.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/atomic.h>
 
 void membarrier_arch_register_private_expedited(struct task_struct *p)
 {
-       struct task_struct *t;
+       struct mm_struct *mm = p->mm;
 
-       if (get_nr_threads(p) == 1) {
-               set_thread_flag(TIF_MEMBARRIER_PRIVATE_EXPEDITED);
+       atomic_or(MEMBARRIER_STATE_SWITCH_MM, &mm->membarrier_state);
+       if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)
                return;
-       }
-       /*
-        * Coherence of TIF_MEMBARRIER_PRIVATE_EXPEDITED against thread
-        * fork is protected by siglock.
-        */
-       spin_lock(&p->sighand->siglock);
-       for_each_thread(p, t)
-               set_ti_thread_flag(task_thread_info(t),
-                               TIF_MEMBARRIER_PRIVATE_EXPEDITED);
-       spin_unlock(&p->sighand->siglock);
        /*
         * Ensure all future scheduler executions will observe the new
         * thread flag state for this process.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5e0fe8ce053b..1861ea8dba77 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -446,7 +446,7 @@ struct mm_struct {
 
        struct core_state *core_state; /* coredumping support */
 #ifdef CONFIG_MEMBARRIER
-       int membarrier_private_expedited;
+       atomic_t membarrier_state;
 #endif
 #ifdef CONFIG_AIO
        spinlock_t                      ioctx_lock;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index b2767ecb21a8..2fb6089efcd7 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -212,35 +212,23 @@ static inline void memalloc_noreclaim_restore(unsigned 
int flags)
 
 #ifdef CONFIG_MEMBARRIER
 
+enum {
+       MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY        = (1U << 0),
+       MEMBARRIER_STATE_SWITCH_MM                      = (1U << 1),
+};
+
 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_HOOKS
 #include <asm/membarrier.h>
 #else
-static inline void membarrier_arch_fork(struct task_struct *t,
-               unsigned long clone_flags)
-{
-}
-static inline void membarrier_arch_execve(struct task_struct *t)
-{
-}
 static inline void membarrier_arch_register_private_expedited(
                struct task_struct *p)
 {
 }
 #endif
 
-static inline void membarrier_fork(struct task_struct *t,
-               unsigned long clone_flags)
-{
-       /*
-        * Prior copy_mm() copies the membarrier_private_expedited field
-        * from current->mm to t->mm.
-        */
-       membarrier_arch_fork(t, clone_flags);
-}
 static inline void membarrier_execve(struct task_struct *t)
 {
-       t->mm->membarrier_private_expedited = 0;
-       membarrier_arch_execve(t);
+       atomic_set(&t->mm->membarrier_state, 0);
 }
 #else
 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_HOOKS
@@ -249,10 +237,6 @@ static inline void membarrier_arch_switch_mm(struct 
mm_struct *prev,
 {
 }
 #endif
-static inline void membarrier_fork(struct task_struct *t,
-               unsigned long clone_flags)
-{
-}
 static inline void membarrier_execve(struct task_struct *t)
 {
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index bd4a93915e08..10646182440f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1840,8 +1840,6 @@ static __latent_entropy struct task_struct *copy_process(
         */
        copy_seccomp(p);
 
-       membarrier_fork(p, clone_flags);
-
        /*
         * Process group and session signals need to be delivered to just the
         * parent before the fork or both the parent and the child after the
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index f06949a279ca..23bd256f8458 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -18,6 +18,7 @@
 #include <linux/membarrier.h>
 #include <linux/tick.h>
 #include <linux/cpumask.h>
+#include <linux/atomic.h>
 
 #include "sched.h"     /* for cpu_rq(). */
 
@@ -40,7 +41,8 @@ static int membarrier_private_expedited(void)
        bool fallback = false;
        cpumask_var_t tmpmask;
 
-       if (!READ_ONCE(current->mm->membarrier_private_expedited))
+       if (!(atomic_read(&current->mm->membarrier_state)
+                       & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
                return -EPERM;
 
        if (num_online_cpus() == 1)
@@ -104,11 +106,19 @@ static int membarrier_private_expedited(void)
 static void membarrier_register_private_expedited(void)
 {
        struct task_struct *p = current;
+       struct mm_struct *mm = p->mm;
 
-       if (READ_ONCE(p->mm->membarrier_private_expedited))
+       /*
+        * We need to consider threads belonging to different thread
+        * groups, which use the same mm. (CLONE_VM but not
+        * CLONE_THREAD).
+        */
+       if (atomic_read(&mm->membarrier_state)
+                       & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
                return;
        membarrier_arch_register_private_expedited(p);
-       WRITE_ONCE(p->mm->membarrier_private_expedited, 1);
+       atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
+                       &mm->membarrier_state);
 }
 
 /**
-- 
2.11.0

Reply via email to