There is some confusion as to which of cond_resched() or
cond_resched_rcu_qs() should be added to long in-kernel loops.
This commit therefore eliminates the decision by adding RCU quiescent
states to cond_resched().  This commit also simplifies the code that
used to interact with cond_resched_rcu_qs(), and that now interacts with
cond_resched(), to reduce its overhead.  This reduction is necessary to
allow the heavier-weight cond_resched_rcu_qs() mechanism to be invoked
everywhere that cond_resched() is invoked.

Part of that reduction in overhead converts the jiffies_till_sched_qs
kernel parameter to read-only at runtime, thus eliminating the need for
bounds checking.

Reported-by: Michal Hocko <mho...@kernel.org>
Signed-off-by: Paul E. McKenney <paul...@linux.vnet.ibm.com>
Cc: Peter Zijlstra <pet...@infradead.org>
---
 include/linux/sched.h |  3 ++-
 kernel/rcu/tree.c     | 25 +++++--------------------
 kernel/sched/core.c   |  1 +
 3 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a7df4e558c..59688ef3ea23 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1565,10 +1565,11 @@ static inline int test_tsk_need_resched(struct 
task_struct *tsk)
  * cond_resched_lock() will drop the spinlock before scheduling,
  * cond_resched_softirq() will enable bhs before scheduling.
  */
+void rcu_all_qs(void);
 #ifndef CONFIG_PREEMPT
 extern int _cond_resched(void);
 #else
-static inline int _cond_resched(void) { return 0; }
+static inline int _cond_resched(void) { rcu_all_qs(); return 0; }
 #endif
 
 #define cond_resched() ({                      \
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0c44c7b42e6d..d3b1d926bb91 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -534,8 +534,8 @@ module_param(rcu_kick_kthreads, bool, 0644);
  * How long the grace period must be before we start recruiting
  * quiescent-state help from rcu_note_context_switch().
  */
-static ulong jiffies_till_sched_qs = HZ / 20;
-module_param(jiffies_till_sched_qs, ulong, 0644);
+static ulong jiffies_till_sched_qs = HZ / 10;
+module_param(jiffies_till_sched_qs, ulong, 0444);
 
 static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
                                  struct rcu_data *rdp);
@@ -1235,7 +1235,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
        unsigned long jtsq;
        bool *rnhqp;
        bool *ruqp;
-       unsigned long rjtsc;
        struct rcu_node *rnp;
 
        /*
@@ -1252,23 +1251,13 @@ static int rcu_implicit_dynticks_qs(struct rcu_data 
*rdp)
                return 1;
        }
 
-       /* Compute and saturate jiffies_till_sched_qs. */
-       jtsq = jiffies_till_sched_qs;
-       rjtsc = rcu_jiffies_till_stall_check();
-       if (jtsq > rjtsc / 2) {
-               WRITE_ONCE(jiffies_till_sched_qs, rjtsc);
-               jtsq = rjtsc / 2;
-       } else if (jtsq < 1) {
-               WRITE_ONCE(jiffies_till_sched_qs, 1);
-               jtsq = 1;
-       }
-
        /*
         * Has this CPU encountered a cond_resched_rcu_qs() since the
         * beginning of the grace period?  For this to be the case,
         * the CPU has to have noticed the current grace period.  This
         * might not be the case for nohz_full CPUs looping in the kernel.
         */
+       jtsq = jiffies_till_sched_qs;
        rnp = rdp->mynode;
        ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
        if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
@@ -1276,7 +1265,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
            READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
                trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
                return 1;
-       } else {
+       } else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) {
                /* Load rcu_qs_ctr before store to rcu_urgent_qs. */
                smp_store_release(ruqp, true);
        }
@@ -1304,10 +1293,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
         * updates are only once every few jiffies, the probability of
         * lossage (and thus of slight grace-period extension) is
         * quite low.
-        *
-        * Note that if the jiffies_till_sched_qs boot/sysfs parameter
-        * is set too high, we override with half of the RCU CPU stall
-        * warning delay.
         */
        rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
        if (!READ_ONCE(*rnhqp) &&
@@ -1316,7 +1301,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
                WRITE_ONCE(*rnhqp, true);
                /* Store rcu_need_heavy_qs before rcu_urgent_qs. */
                smp_store_release(ruqp, true);
-               rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
+               rdp->rsp->jiffies_resched += jtsq; /* Re-enable beating. */
        }
 
        /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d17c5da523a0..904d3ab35c83 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4842,6 +4842,7 @@ int __sched _cond_resched(void)
                preempt_schedule_common();
                return 1;
        }
+       rcu_all_qs();
        return 0;
 }
 EXPORT_SYMBOL(_cond_resched);
-- 
2.5.2

Reply via email to