A full barrier is issued from nocb_gp_wait() upon callbacks advancing
to order grace period completion with callbacks execution.

However these two events are already ordered by the
smp_mb__after_unlock_lock() barrier within the call to
raw_spin_lock_rcu_node() that is necessary for callbacks advancing to
happen.

The following litmus test shows the kind of guarantee that this barrier
provides:

        C smp_mb__after_unlock_lock

        {}

        // rcu_gp_cleanup()
        P0(spinlock_t *rnp_lock, int *gpnum)
        {
                // Grace period cleanup increase gp sequence number
                spin_lock(rnp_lock);
                WRITE_ONCE(*gpnum, 1);
                spin_unlock(rnp_lock);
        }

        // nocb_gp_wait()
        P1(spinlock_t *rnp_lock, spinlock_t *nocb_lock, int *gpnum, int 
*cb_ready)
        {
                int r1;

                // Call rcu_advance_cbs() from nocb_gp_wait()
                spin_lock(nocb_lock);
                spin_lock(rnp_lock);
                smp_mb__after_unlock_lock();
                r1 = READ_ONCE(*gpnum);
                WRITE_ONCE(*cb_ready, 1);
                spin_unlock(rnp_lock);
                spin_unlock(nocb_lock);
        }

        // nocb_cb_wait()
        P2(spinlock_t *nocb_lock, int *cb_ready, int *cb_executed)
        {
                int r2;

                // rcu_do_batch() -> rcu_segcblist_extract_done_cbs()
                spin_lock(nocb_lock);
                r2 = READ_ONCE(*cb_ready);
                spin_unlock(nocb_lock);

                // Actual callback execution
                WRITE_ONCE(*cb_executed, 1);
        }

        P3(int *cb_executed, int *gpnum)
        {
                int r3;

                WRITE_ONCE(*cb_executed, 2);
                smp_mb();
                r3 = READ_ONCE(*gpnum);
        }

        exists (1:r1=1 /\ 2:r2=1 /\ cb_executed=2 /\ 3:r3=0) (* Bad outcome. *)

Here the bad outcome only occurs if the smp_mb__after_unlock_lock() is
removed. This barrier orders the grace period completion against
callbacks advancing and even later callbacks invocation, thanks to the
opportunistic propagation via the ->nocb_lock to nocb_cb_wait().

Therefore the smp_mb() placed after callbacks advancing can be safely
removed.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
 kernel/rcu/tree.c      | 6 ++++++
 kernel/rcu/tree_nocb.h | 1 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3ac3c846105f..c557302fc4f5 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2113,6 +2113,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
         * Extract the list of ready callbacks, disabling IRQs to prevent
         * races with call_rcu() from interrupt handlers.  Leave the
         * callback counts, as rcu_barrier() needs to be conservative.
+        *
+        * Callbacks execution is fully ordered against preceding grace period
+        * completion (materialized by rnp->gp_seq update) thanks to the
+        * smp_mb__after_unlock_lock() upon node locking required for callbacks
+        * advancing. In NOCB mode this ordering is then further relayed through
+        * the nocb locking that protects both callbacks advancing and 
extraction.
         */
        rcu_nocb_lock_irqsave(rdp, flags);
        WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index eb27878d46f1..d82f96a66600 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -779,7 +779,6 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
                if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
                        needwake = rdp->nocb_cb_sleep;
                        WRITE_ONCE(rdp->nocb_cb_sleep, false);
-                       smp_mb(); /* CB invocation -after- GP end. */
                } else {
                        needwake = false;
                }
-- 
2.42.1

Reply via email to