From: "Paul E. McKenney" <paul...@kernel.org>

During CSD-lock stalls, the additional information output by RCU CPU
stall warnings is usually redundant, flooding the console for not good
reason.  However, this has been the way things work for a few years.
This commit therefore adds an rcutree.csd_lock_suppress_rcu_stall kernel
boot parameter that causes RCU CPU stall warnings to be abbreviated to
a single line when there is at least one CPU that has been stuck waiting
for CSD lock for more than five seconds.

To make this abbreviated message happen with decent probability:

tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 8 \
        --configs "2*TREE01" --kconfig "CONFIG_CSD_LOCK_WAIT_DEBUG=y" \
        --bootargs "csdlock_debug=1 rcutorture.stall_cpu=200 \
        rcutorture.stall_cpu_holdoff=120 rcutorture.stall_cpu_irqsoff=1 \
        rcutree.csd_lock_suppress_rcu_stall=1 \
        rcupdate.rcu_exp_cpu_stall_timeout=5000" --trust-make

[ paulmck: Apply kernel test robot feedback. ]

Signed-off-by: Paul E. McKenney <paul...@kernel.org>
Signed-off-by: Neeraj Upadhyay <neeraj.upadh...@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 4 ++++
 kernel/rcu/tree_stall.h                         | 8 +++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index f1384c7b59c9..d56356c13184 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4937,6 +4937,10 @@
                        Set maximum number of finished RCU callbacks to
                        process in one batch.
 
+       rcutree.csd_lock_suppress_rcu_stall=    [KNL]
+                       Do only a one-line RCU CPU stall warning when
+                       there is an ongoing too-long CSD-lock wait.
+
        rcutree.do_rcu_barrier= [KNL]
                        Request a call to rcu_barrier().  This is
                        throttled so that userspace tests can safely
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 4b0e9d7c4c68..b497d4c6dabd 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -9,6 +9,7 @@
 
 #include <linux/kvm_para.h>
 #include <linux/rcu_notifier.h>
+#include <linux/smp.h>
 
 //////////////////////////////////////////////////////////////////////////////
 //
@@ -719,6 +720,9 @@ static void print_cpu_stall(unsigned long gps)
        set_preempt_need_resched();
 }
 
+static bool csd_lock_suppress_rcu_stall;
+module_param(csd_lock_suppress_rcu_stall, bool, 0644);
+
 static void check_cpu_stall(struct rcu_data *rdp)
 {
        bool self_detected;
@@ -791,7 +795,9 @@ static void check_cpu_stall(struct rcu_data *rdp)
                        return;
 
                rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j 
- gps);
-               if (self_detected) {
+               if (READ_ONCE(csd_lock_suppress_rcu_stall) && 
csd_lock_is_stuck()) {
+                       pr_err("INFO: %s detected stall, but suppressed full 
report due to a stuck CSD-lock.\n", rcu_state.name);
+               } else if (self_detected) {
                        /* We haven't checked in, so go dump stack. */
                        print_cpu_stall(gps);
                } else {
-- 
2.40.1


Reply via email to