From: "Paul E. McKenney" <paul...@linux.vnet.ibm.com>

If an expedited grace period executes after the start of a normal
grace period, there is no point in the normal grace period doing any
more work, as the expedited grace period has supplied all the needed
quiescent states.  This commit therefore makes the normal grace-period
initialization process take a snapshot of the expedited grace-period
state, and then recheck the state just before each force-quiescent-state
scan.  If the recheck determines that a full expedited grace period
executed since the beginning of the normal grace period, the grace-period
kthread proceeds immediately to grace-period cleanup.

Because the expedited grace period does not awaken the grace-period
kthread, this change should provide only a minimal reduction in
grace-period latency, however, it does reduce the overhead of detecting
the end of the grace period.

Signed-off-by: Paul E. McKenney <paul...@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 40 ++++++++++++++++++++++++++++++++--------
 kernel/rcu/tree.h |  6 ++++++
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index bea9b9c80d91..5365f6332a60 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -104,6 +104,7 @@ struct rcu_state sname##_state = { \
        .orphan_nxttail = &sname##_state.orphan_nxtlist, \
        .orphan_donetail = &sname##_state.orphan_donelist, \
        .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
+       .exp_rsp = &sname##_state, \
        .name = RCU_STATE_NAME(sname), \
        .abbr = sabbr, \
 }
@@ -1954,6 +1955,15 @@ static int rcu_gp_init(struct rcu_state *rsp)
                WRITE_ONCE(rsp->gp_activity, jiffies);
        }
 
+       /*
+        * Record associated expedited-grace-period snapshot.  This cannot
+        * be done before the root rcu_node structure has been initialized
+        * due to the fact that callbacks can still be registered for the
+        * current grace period until that initialization is complete.
+        */
+       rsp->gp_exp_snap = rcu_exp_gp_seq_snap(rsp->exp_rsp);
+       rsp->gp_exp_help = false;
+
        return 1;
 }
 
@@ -2035,8 +2045,14 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        rcu_for_each_node_breadth_first(rsp, rnp) {
                raw_spin_lock_irq(&rnp->lock);
                smp_mb__after_unlock_lock();
-               WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
-               WARN_ON_ONCE(rnp->qsmask);
+               if (!rsp->gp_exp_help) {
+                       WARN_ON_ONCE(rnp->qsmask);
+                       WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
+               } else {
+                       /* Clear out state from truncated grace period. */
+                       rnp->qsmask = 0;
+                       rnp->gp_tasks = NULL;
+               }
                WRITE_ONCE(rnp->completed, rsp->gpnum);
                rdp = this_cpu_ptr(rsp->rda);
                if (rnp == rdp->mynode)
@@ -2121,17 +2137,24 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                               TPS("fqswait"));
                        rsp->gp_state = RCU_GP_WAIT_FQS;
                        ret = wait_event_interruptible_timeout(rsp->gp_wq,
-                                       ((gf = READ_ONCE(rsp->gp_flags)) &
-                                        RCU_GP_FLAG_FQS) ||
-                                       (!READ_ONCE(rnp->qsmask) &&
-                                        !rcu_preempt_blocked_readers_cgp(rnp)),
-                                       j);
+                               ((gf = READ_ONCE(rsp->gp_flags)) &
+                                RCU_GP_FLAG_FQS) ||
+                               (!READ_ONCE(rnp->qsmask) &&
+                                !rcu_preempt_blocked_readers_cgp(rnp)) ||
+                               rcu_exp_gp_seq_done(rsp->exp_rsp,
+                                                   rsp->gp_exp_snap),
+                               j);
                        rsp->gp_state = RCU_GP_DONE_FQS;
                        /* Locking provides needed memory barriers. */
                        /* If grace period done, leave loop. */
                        if (!READ_ONCE(rnp->qsmask) &&
-                           !rcu_preempt_blocked_readers_cgp(rnp))
+                           !rcu_preempt_blocked_readers_cgp(rnp)) {
                                break;
+                       } else if (rcu_exp_gp_seq_done(rsp->exp_rsp,
+                                                      rsp->gp_exp_snap)) {
+                               rsp->gp_exp_help = true;
+                               break;
+                       }
                        /* If time for quiescent-state forcing, do it. */
                        if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
                            (gf & RCU_GP_FLAG_FQS)) {
@@ -4190,6 +4213,7 @@ void __init rcu_init(void)
        rcu_init_geometry();
        rcu_init_one(&rcu_bh_state, &rcu_bh_data);
        rcu_init_one(&rcu_sched_state, &rcu_sched_data);
+       rcu_bh_state.exp_rsp = &rcu_sched_state;
        if (dump_tree)
                rcu_dump_rcu_node_tree(&rcu_sched_state);
        __rcu_init_preempt();
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e40b65d45495..eed6e84cb182 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -466,6 +466,10 @@ struct rcu_state {
        wait_queue_head_t gp_wq;                /* Where GP task waits. */
        short gp_flags;                         /* Commands for GP task. */
        short gp_state;                         /* GP kthread sleep state. */
+       bool gp_exp_help;                       /* Expedited GP helped the */
+                                               /*  just-completed normal GP. */
+       unsigned long gp_exp_snap;              /* Expedited snapshot for */
+                                               /*  FQS short-circuiting. */
 
        /* End of fields guarded by root rcu_node's lock. */
 
@@ -495,6 +499,8 @@ struct rcu_state {
        atomic_long_t expedited_normal;         /* # fallbacks to normal. */
        atomic_t expedited_need_qs;             /* # CPUs left to check in. */
        wait_queue_head_t expedited_wq;         /* Wait for check-ins. */
+       struct rcu_state *exp_rsp;              /* RCU flavor that expedites */
+                                               /*  for this flavor. */
 
        unsigned long jiffies_force_qs;         /* Time at which to invoke */
                                                /*  force_quiescent_state(). */
-- 
1.8.1.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to