On Wed, Apr 15, 2026 at 11:21:05PM +0000, Aniket Gattani wrote:

> @@ -358,14 +371,19 @@ static int membarrier_private_expedited(int flags, int 
> cpu_id)
>       if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
>               return -ENOMEM;
>  
> -     SERIALIZE_IPI();
> +     if ((unsigned int)cpu_id >= nr_cpu_ids || !cpu_possible(cpu_id))
> +             return 0;

Right, so this is broken, sorry about that. This makes the whole
cpu_id == -1 branch below dead code, and that cannot be right.

> +     SERIALIZE_IPI_CPU(cpu_id);
> +
>       cpus_read_lock();
>  
>       if (cpu_id >= 0) {
>               struct task_struct *p;
>  
> -             if (cpu_id >= nr_cpu_ids || !cpu_online(cpu_id))
> +             if (!cpu_online(cpu_id))
>                       goto out;
> +
>               rcu_read_lock();
>               p = rcu_dereference(cpu_rq(cpu_id)->curr);
>               if (!p || p->mm != mm) {


How about something like so?

---
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 623445603725..5b056fff7d1d 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -165,7 +165,20 @@
        | MEMBARRIER_CMD_GET_REGISTRATIONS)
 
 static DEFINE_MUTEX(membarrier_ipi_mutex);
+static DEFINE_PER_CPU(struct mutex, membarrier_cpu_mutexes);
+
 #define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex)
+#define SERIALIZE_IPI_CPU(cpu_id) 
guard(mutex)(&per_cpu(membarrier_cpu_mutexes, cpu_id))
+
+static int __init membarrier_init(void)
+{
+       int i;
+
+       for_each_possible_cpu(i)
+               mutex_init(&per_cpu(membarrier_cpu_mutexes, i));
+       return 0;
+}
+core_initcall(membarrier_init);
 
 static void ipi_mb(void *info)
 {
@@ -313,9 +326,10 @@ static int membarrier_global_expedited(void)
        return 0;
 }
 
+DEFINE_LOCK_GUARD_0(mb, smp_mb(), smp_mb())
+
 static int membarrier_private_expedited(int flags, int cpu_id)
 {
-       cpumask_var_t tmpmask;
        struct mm_struct *mm = current->mm;
        smp_call_func_t ipi_func = ipi_mb;
 
@@ -352,30 +366,46 @@ static int membarrier_private_expedited(int flags, int 
cpu_id)
         * On RISC-V, this barrier pairing is also needed for the
         * SYNC_CORE command when switching between processes, cf.
         * the inline comments in membarrier_arch_switch_mm().
+        *
+        * Memory barrier on the caller thread _after_ we finished
+        * waiting for the last IPI. Matches memory barriers before
+        * rq->curr modification in scheduler.
         */
-       smp_mb();       /* system call entry is not a mb. */
-
-       if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
-               return -ENOMEM;
-
-       SERIALIZE_IPI();
-       cpus_read_lock();
+       guard(mb)();
+       guard(cpus_read_lock)();
 
        if (cpu_id >= 0) {
                struct task_struct *p;
 
-               if (cpu_id >= nr_cpu_ids || !cpu_online(cpu_id))
-                       goto out;
+               if (cpu_id >= nr_cpu_ids)
+                       return 0; // -EINVAL ?
+
+               if (!cpu_online(cpu_id))
+                       return 0;
+
+               SERIALIZE_IPI_CPU(cpu_id);
+
                rcu_read_lock();
                p = rcu_dereference(cpu_rq(cpu_id)->curr);
                if (!p || p->mm != mm) {
                        rcu_read_unlock();
-                       goto out;
+                       return 0;
                }
                rcu_read_unlock();
+               /*
+                * smp_call_function_single() will call ipi_func() if cpu_id
+                * is the calling CPU.
+                */
+               smp_call_function_single(cpu_id, ipi_func, NULL, 1);
        } else {
+               cpumask_var_t __free(free_cpumask_var) tmpmask = 
CPUMASK_VAR_NULL;
                int cpu;
 
+               SERIALIZE_IPI();
+
+               if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+                       return -ENOMEM;
+
                rcu_read_lock();
                for_each_online_cpu(cpu) {
                        struct task_struct *p;
@@ -385,15 +415,6 @@ static int membarrier_private_expedited(int flags, int 
cpu_id)
                                __cpumask_set_cpu(cpu, tmpmask);
                }
                rcu_read_unlock();
-       }
-
-       if (cpu_id >= 0) {
-               /*
-                * smp_call_function_single() will call ipi_func() if cpu_id
-                * is the calling CPU.
-                */
-               smp_call_function_single(cpu_id, ipi_func, NULL, 1);
-       } else {
                /*
                 * For regular membarrier, we can save a few cycles by
                 * skipping the current cpu -- we're about to do smp_mb()
@@ -420,18 +441,6 @@ static int membarrier_private_expedited(int flags, int 
cpu_id)
                }
        }
 
-out:
-       if (cpu_id < 0)
-               free_cpumask_var(tmpmask);
-       cpus_read_unlock();
-
-       /*
-        * Memory barrier on the caller thread _after_ we finished
-        * waiting for the last IPI. Matches memory barriers before
-        * rq->curr modification in scheduler.
-        */
-       smp_mb();       /* exit from system call is not a mb */
-
        return 0;
 }
 

Reply via email to