Instead of providing asynchronous checks for the nohz subsystem to verify
sched tick dependency, migrate sched to the new mask.

The easiest is to recycle the current asynchronous tick dependency check
which verifies the class of the current task and its requirements for
periodic preemption checks.

We need to evaluate this tick dependency on three places:

1) Task enqueue: One or more tasks have been enqueued, we must check
   if those are competing with the current task.

2) Task dequeue: A possibly competing task has been dequeued, clear the
   tick dependency if needed.

3) schedule(): we might be switching to a task of another scheduler
   class. Each class has its preemption rules, we must re-evaluate it.

This doesn't change much compared to the previous layout, except that
3) has to be done with rq locked to avoid mask change racing with remote
enqueue.

We could get away with 3) by checking the highest prio tasks of the
runqueue instead of its current task.

Cc: Christoph Lameter <c...@linux.com>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Preeti U Murthy <pre...@linux.vnet.ibm.com>
Cc: Rik van Riel <r...@redhat.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Viresh Kumar <viresh.ku...@linaro.org>
Signed-off-by: Frederic Weisbecker <fweis...@gmail.com>
---
 include/linux/sched.h    |  3 ---
 kernel/sched/core.c      | 12 ++++++-----
 kernel/sched/sched.h     | 56 +++++++++++++++++++++++++++++++++++-------------
 kernel/time/tick-sched.c |  5 -----
 4 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ae21f15..88c99a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2296,10 +2296,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-extern bool sched_can_stop_tick(void);
 extern u64 scheduler_tick_max_deferment(void);
-#else
-static inline bool sched_can_stop_tick(void) { return false; }
 #endif
 
 #ifdef CONFIG_SCHED_AUTOGROUP
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4d34035..6c3db36 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -714,21 +714,22 @@ static inline bool got_nohz_idle_kick(void)
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
-bool sched_can_stop_tick(void)
+bool sched_can_stop_tick(struct rq *rq)
 {
+       struct task_struct *curr = rq->curr;
        /*
         * FIFO realtime policy runs the highest priority task. Other runnable
         * tasks are of a lower priority. The scheduler tick does nothing.
         */
-       if (current->policy == SCHED_FIFO)
+       if (curr->policy == SCHED_FIFO)
                return true;
 
        /*
         * Round-robin realtime tasks time slice with other tasks at the same
         * realtime priority. Is this task the only one at this priority?
         */
-       if (current->policy == SCHED_RR) {
-               struct sched_rt_entity *rt_se = &current->rt;
+       if (curr->policy == SCHED_RR) {
+               struct sched_rt_entity *rt_se = &curr->rt;
 
                return rt_se->run_list.prev == rt_se->run_list.next;
        }
@@ -738,7 +739,7 @@ bool sched_can_stop_tick(void)
         * nr_running update is assumed to be visible
         * after IPI is sent from wakers.
         */
-       if (this_rq()->nr_running > 1)
+       if (rq->nr_running > 1)
                return false;
 
        return true;
@@ -2489,6 +2490,7 @@ static struct rq *finish_task_switch(struct task_struct 
*prev)
                put_task_struct(prev);
        }
 
+       sched_update_tick_dependency(rq);
        tick_nohz_task_switch();
        return rq;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 84d4879..5037acf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1321,6 +1321,38 @@ unsigned long to_ratio(u64 period, u64 runtime);
 
 extern void init_task_runnable_average(struct task_struct *p);
 
+#ifdef CONFIG_NO_HZ_FULL
+extern bool sched_can_stop_tick(struct rq *rq);
+
+/*
+ * Tick is needed if more than one task runs on a CPU.
+ * Send the target an IPI to kick it out of nohz mode.
+ *
+ * We assume that IPI implies full memory barrier and the
+ * new value of rq->nr_running is visible on reception
+ * from the target.
+ */
+static inline void sched_update_tick_dependency(struct rq *rq)
+{
+       int cpu;
+
+       if (!tick_nohz_full_enabled())
+               return;
+
+       cpu = cpu_of(rq);
+
+       if (!tick_nohz_full_cpu(rq->cpu))
+               return;
+
+       if (sched_can_stop_tick(rq))
+               tick_nohz_clear_tick_dependency_cpu(TICK_SCHED_BIT, cpu);
+       else
+               tick_nohz_set_tick_dependency_cpu(TICK_SCHED_BIT, cpu);
+}
+#else
+static inline void sched_update_tick_dependency(struct rq *rq) { }
+#endif
+
 static inline void add_nr_running(struct rq *rq, unsigned count)
 {
        unsigned prev_nr = rq->nr_running;
@@ -1332,26 +1364,20 @@ static inline void add_nr_running(struct rq *rq, 
unsigned count)
                if (!rq->rd->overload)
                        rq->rd->overload = true;
 #endif
-
-#ifdef CONFIG_NO_HZ_FULL
-               if (tick_nohz_full_cpu(rq->cpu)) {
-                       /*
-                        * Tick is needed if more than one task runs on a CPU.
-                        * Send the target an IPI to kick it out of nohz mode.
-                        *
-                        * We assume that IPI implies full memory barrier and 
the
-                        * new value of rq->nr_running is visible on reception
-                        * from the target.
-                        */
-                       tick_nohz_full_kick_cpu(rq->cpu);
-               }
-#endif
+               /* Check if new task(s) need periodic preemption check */
+               sched_update_tick_dependency(rq);
        }
 }
 
 static inline void sub_nr_running(struct rq *rq, unsigned count)
 {
-       rq->nr_running -= count;
+       unsigned prev_nr = rq->nr_running;
+
+       rq->nr_running = prev_nr - count;
+       if (prev_nr > 1) {
+               /* Check if we still need preemption */
+               sched_update_tick_dependency(rq);
+       }
 }
 
 static inline void rq_last_tick_reset(struct rq *rq)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fbe4736..e6447bd 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -192,11 +192,6 @@ static bool can_stop_full_tick(struct tick_sched *ts)
                return false;
        }
 
-       if (!sched_can_stop_tick()) {
-               trace_tick_stop(0, "more than 1 task in runqueue\n");
-               return false;
-       }
-
        if (!posix_cpu_timers_can_stop_tick(current)) {
                trace_tick_stop(0, "posix timers running\n");
                return false;
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to