When expedited grace-period is set, both synchronize_sched
synchronize_rcu_bh can be optimized to have a significantly lower latency.

Improve wait_rcu_gp handling to also account for expedited grace-period.
The downside is that wait_rcu_gp will not wait anymore for all RCU variants
concurrently when an expedited grace-period is set, however, given the
improved latency it does not really matter.

Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
Cc: Josh Triplett <j...@joshtriplett.org>
Cc: Steven Rostedt <rost...@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoy...@efficios.com>
Cc: Lai Jiangshan <jiangshan...@gmail.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: KarimAllah Ahmed <karah...@amazon.de>
---
 kernel/rcu/update.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 68fa19a..44b8817 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -392,13 +392,27 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t 
*crcu_array,
                        might_sleep();
                        continue;
                }
-               init_rcu_head_on_stack(&rs_array[i].head);
-               init_completion(&rs_array[i].completion);
+
                for (j = 0; j < i; j++)
                        if (crcu_array[j] == crcu_array[i])
                                break;
-               if (j == i)
-                       (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu);
+               if (j != i)
+                       continue;
+
+               if ((crcu_array[i] == call_rcu_sched ||
+                    crcu_array[i] == call_rcu_bh)
+                   && rcu_gp_is_expedited()) {
+                       if (crcu_array[i] == call_rcu_sched)
+                               synchronize_sched_expedited();
+                       else
+                               synchronize_rcu_bh_expedited();
+
+                       continue;
+               }
+
+               init_rcu_head_on_stack(&rs_array[i].head);
+               init_completion(&rs_array[i].completion);
+               (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu);
        }
 
        /* Wait for all callbacks to be invoked. */
@@ -407,11 +421,19 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t 
*crcu_array,
                    (crcu_array[i] == call_rcu ||
                     crcu_array[i] == call_rcu_bh))
                        continue;
+
+               if ((crcu_array[i] == call_rcu_sched ||
+                    crcu_array[i] == call_rcu_bh)
+                   && rcu_gp_is_expedited())
+                       continue;
+
                for (j = 0; j < i; j++)
                        if (crcu_array[j] == crcu_array[i])
                                break;
-               if (j == i)
-                       wait_for_completion(&rs_array[i].completion);
+               if (j != i)
+                       continue;
+
+               wait_for_completion(&rs_array[i].completion);
                destroy_rcu_head_on_stack(&rs_array[i].head);
        }
 }
-- 
2.7.4

Reply via email to