On Tue, Nov 03, 2015 at 02:31:20PM +0100, Peter Zijlstra wrote: > > @@ -5136,6 +5148,16 @@ pick_next_task_fair(struct rq *rq, struct > > task_struct *prev) > > struct task_struct *p; > > int new_tasks; > > > > +#ifdef CONFIG_CFS_IDLE_INJECT > > + if (cfs_rq->force_throttled && > > + !idle_cpu(cpu_of(rq)) && > > + !unlikely(local_softirq_pending())) { > > + /* forced idle, pick no task */ > > + trace_sched_cfs_idle_inject(cpu_of(rq), 1); > > + update_curr(cfs_rq); > > + return NULL; > > + } > > +#endif > > again: > > #ifdef CONFIG_FAIR_GROUP_SCHED > > if (!cfs_rq->nr_running) > > So this is horrible...
So this isn't ideal either (I rather liked the previous approach of a random task assuming idle, but tglx hated that). This should at least not touch extra cachelines in the hot paths, although it does add a few extra instructions :/ Very limited testing didn't show anything horrible. Your throttle would: raw_spin_lock_irqsave(&rq->lock, flags); rq->cfs.forced_idle = true; resched = rq->cfs.runnable; rq->cfs.runnable = false; raw_spin_unlock_irqrestore(&rq->lock, flags); if (resched) resched_cpu(cpu_of(rq)); And your unthrottle: raw_spin_lock_irqsave(&rq->lock, flags); rq->cfs.forced_idle = false; resched = rq->cfs.runnable = !!rq->cfs.nr_running; raw_spin_unlock_irqrestore(&rq->lock, flags); if (resched) resched_cpu(cpu_of(rq)); --- kernel/sched/fair.c | 13 +++++++++---- kernel/sched/sched.h | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 824aa9f..1f0c809 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2341,7 +2341,8 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) list_add(&se->group_node, &rq->cfs_tasks); } #endif - cfs_rq->nr_running++; + if (!cfs_rq->nr_running++ && !cfs_rq->forced_idle) + cfs_rq->runnable = true; } static void @@ -2354,7 +2355,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) account_numa_dequeue(rq_of(cfs_rq), task_of(se)); list_del_init(&se->group_node); } - cfs_rq->nr_running--; + if (!--cfs_rq->nr_running && !cfs_rq->forced_idle) + cfs_rq->runnable = false; } #ifdef CONFIG_FAIR_GROUP_SCHED @@ -5204,7 +5206,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) again: #ifdef CONFIG_FAIR_GROUP_SCHED - if (!cfs_rq->nr_running) + if (!cfs_rq->runnable) goto idle; if (prev->sched_class != &fair_sched_class) @@ -5283,7 +5285,7 @@ simple: cfs_rq = &rq->cfs; #endif - if (!cfs_rq->nr_running) + if (!cfs_rq->runnable) goto idle; put_prev_task(rq, prev); @@ -5302,6 +5304,9 @@ simple: return p; idle: + if (cfs_rq->forced_idle) + return NULL; + /* * This is OK, because current is on_cpu, which avoids it being picked * for load-balance and preemption/IRQs are still disabled avoiding diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index efd3bfc..33d355d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -347,6 +347,7 @@ struct cfs_bandwidth { }; struct cfs_rq { struct load_weight load; unsigned int nr_running, h_nr_running; + unsigned int runnable, forced_idle; u64 exec_clock; u64 min_vruntime; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/