{en,de}queue_task_fair are updated to track which cpus will have changed
utilization values as function of task queueing. The affected cpus are
passed on to arch_eval_cpu_freq for further machine-specific processing
based on a selectable policy.

arch_scale_cpu_freq is called from run_rebalance_domains as a way to
kick off the scaling process (via wake_up_process), so as to prevent
re-entering the {en,de}queue code.

All of the call sites in this patch are up for discussion. Does it make
sense to track which cpus have updated statistics in enqueue_fair_task?
I chose this because I wanted to gather statistics for all cpus affected
in the event CONFIG_FAIR_GROUP_SCHED is enabled. As agreed at LPC14 the
next version of this patch will focus on the simpler case of not using
scheduler cgroups, which should remove a good chunk of this code,
including the cpumask stuff.

Also discussed at LPC14 is that fact that load_balance is a very
interesting place to do this as frequency can be considered in concert
with task placement. Please put forth any ideas on a sensible way to do
this.

Is run_rebalance_domains a logical place to change cpu frequency? What
other call sites make sense?

Even for platforms that can target a cpu frequency without sleeping
(x86, some ARM platforms with PM microcontrollers) it is currently
necessary to always kick the frequency target work out into a kthread.
This is because of the rw_sem usage in the cpufreq core which might
sleep. Replacing that lock type is probably a good idea.

Not-signed-off-by: Mike Turquette <mturque...@linaro.org>
---
 kernel/sched/fair.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1af6f6d..3619f63 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3999,6 +3999,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, 
int flags)
 {
        struct cfs_rq *cfs_rq;
        struct sched_entity *se = &p->se;
+       struct cpumask update_cpus;
+
+       cpumask_clear(&update_cpus);
 
        for_each_sched_entity(se) {
                if (se->on_rq)
@@ -4028,12 +4031,27 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, 
int flags)
 
                update_cfs_shares(cfs_rq);
                update_entity_load_avg(se, 1);
+               /* track cpus that need to be re-evaluated */
+               cpumask_set_cpu(cpu_of(rq_of(cfs_rq)), &update_cpus);
        }
 
+       /* !CONFIG_FAIR_GROUP_SCHED */
        if (!se) {
                update_rq_runnable_avg(rq, rq->nr_running);
                add_nr_running(rq, 1);
+
+               /*
+                * FIXME for !CONFIG_FAIR_GROUP_SCHED it might be nice to
+                * typedef update_cpus into an int and skip all of the cpumask
+                * stuff
+                */
+               cpumask_set_cpu(cpu_of(rq), &update_cpus);
        }
+
+       if (energy_aware())
+               if (!cpumask_empty(&update_cpus))
+                       arch_eval_cpu_freq(&update_cpus);
+
        hrtick_update(rq);
 }
 
@@ -4049,6 +4067,9 @@ static void dequeue_task_fair(struct rq *rq, struct 
task_struct *p, int flags)
        struct cfs_rq *cfs_rq;
        struct sched_entity *se = &p->se;
        int task_sleep = flags & DEQUEUE_SLEEP;
+       struct cpumask update_cpus;
+
+       cpumask_clear(&update_cpus);
 
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
@@ -4089,12 +4110,27 @@ static void dequeue_task_fair(struct rq *rq, struct 
task_struct *p, int flags)
 
                update_cfs_shares(cfs_rq);
                update_entity_load_avg(se, 1);
+               /* track runqueues/cpus that need to be re-evaluated */
+               cpumask_set_cpu(cpu_of(rq_of(cfs_rq)), &update_cpus);
        }
 
+       /* !CONFIG_FAIR_GROUP_SCHED */
        if (!se) {
                sub_nr_running(rq, 1);
                update_rq_runnable_avg(rq, 1);
+
+               /*
+                * FIXME for !CONFIG_FAIR_GROUP_SCHED it might be nice to
+                * typedef update_cpus into an int and skip all of the cpumask
+                * stuff
+                */
+               cpumask_set_cpu(cpu_of(rq), &update_cpus);
        }
+
+       if (energy_aware())
+               if (!cpumask_empty(&update_cpus))
+                       arch_eval_cpu_freq(&update_cpus);
+
        hrtick_update(rq);
 }
 
@@ -7536,6 +7572,9 @@ static void run_rebalance_domains(struct softirq_action 
*h)
         * stopped.
         */
        nohz_idle_balance(this_rq, idle);
+
+       if (energy_aware())
+               arch_scale_cpu_freq();
 }
 
 /*
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to