Add a few knobs to poke while playing with the new code.

Not-Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 include/linux/sched/sysctl.h |    1 
 kernel/sched/fair.c          |   86 ++++++++++++++++++++++++++++++++++---------
 kernel/sched/features.h      |   10 +++++
 kernel/sysctl.c              |    7 +++
 4 files changed, 86 insertions(+), 18 deletions(-)

--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -37,6 +37,7 @@ extern unsigned int sysctl_sched_migrati
 extern unsigned int sysctl_sched_nr_migrate;
 extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_sched_shares_window;
+extern unsigned int sysctl_sched_shift;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *length,
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -114,6 +114,8 @@ unsigned int __read_mostly sysctl_sched_
 unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
 #endif
 
+const_debug unsigned int sysctl_sched_shift = 9;
+
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
        lw->weight += inc;
@@ -5354,18 +5356,24 @@ static inline int select_idle_smt(struct
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int 
target)
 {
        struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
-       u64 avg_idle = this_rq()->avg_idle;
-       u64 avg_cost = this_sd->avg_scan_cost;
        u64 time, cost;
        s64 delta;
        int cpu, wrap;
 
-       /*
-        * Due to large variance we need a large fuzz factor; hackbench in
-        * particularly is sensitive here.
-        */
-       if ((avg_idle / 512) < avg_cost)
-               return -1;
+       if (sched_feat(AVG_CPU)) {
+               u64 avg_idle = this_rq()->avg_idle;
+               u64 avg_cost = this_sd->avg_scan_cost;
+
+               if (sched_feat(PRINT_AVG))
+                       trace_printk("idle: %Ld cost: %Ld\n", avg_idle, 
avg_cost);
+
+               /*
+                * Due to large variance we need a large fuzz factor; hackbench 
in
+                * particularly is sensitive here.
+                */
+               if ((avg_idle >> sysctl_sched_shift) < avg_cost)
+                       return -1;
+       }
 
        time = local_clock();
 
@@ -5379,6 +5387,7 @@ static int select_idle_cpu(struct task_s
        time = local_clock() - time;
        cost = this_sd->avg_scan_cost;
        delta = (s64)(time - cost) / 8;
+       /* trace_printk("time: %Ld cost: %Ld delta: %Ld\n", time, cost, delta); 
*/
        this_sd->avg_scan_cost += delta;
 
        return cpu;
@@ -5390,7 +5399,7 @@ static int select_idle_cpu(struct task_s
 static int select_idle_sibling(struct task_struct *p, int target)
 {
        struct sched_domain *sd;
-       int i = task_cpu(p);
+       int start, i = task_cpu(p);
 
        if (idle_cpu(target))
                return target;
@@ -5401,21 +5410,62 @@ static int select_idle_sibling(struct ta
        if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
                return i;
 
+       start = target;
+       if (sched_feat(ORDER_IDLE))
+               start = per_cpu(sd_llc_id, target); /* first cpu in llc domain 
*/
+
        sd = rcu_dereference(per_cpu(sd_llc, target));
        if (!sd)
                return target;
 
-       i = select_idle_core(p, sd, target);
-       if ((unsigned)i < nr_cpumask_bits)
-               return i;
+       if (sched_feat(OLD_IDLE)) {
+               struct sched_group *sg;
 
-       i = select_idle_cpu(p, sd, target);
-       if ((unsigned)i < nr_cpumask_bits)
-               return i;
+               for_each_lower_domain(sd) {
+                       sg = sd->groups;
+                       do {
+                               if (!cpumask_intersects(sched_group_cpus(sg),
+                                                       tsk_cpus_allowed(p)))
+                                       goto next;
+
+                               /* Ensure the entire group is idle */
+                               for_each_cpu(i, sched_group_cpus(sg)) {
+                                       if (i == target || !idle_cpu(i))
+                                               goto next;
+                               }
 
-       i = select_idle_smt(p, sd, target);
-       if ((unsigned)i < nr_cpumask_bits)
-               return i;
+                               /*
+                                * It doesn't matter which cpu we pick, the
+                                * whole group is idle.
+                                */
+                               target = cpumask_first_and(sched_group_cpus(sg),
+                                               tsk_cpus_allowed(p));
+                               goto done;
+next:
+                               sg = sg->next;
+                       } while (sg != sd->groups);
+               }
+done:
+               return target;
+       }
+
+       if (sched_feat(IDLE_CORE)) {
+               i = select_idle_core(p, sd, start);
+               if ((unsigned)i < nr_cpumask_bits)
+                       return i;
+       }
+
+       if (sched_feat(IDLE_CPU)) {
+               i = select_idle_cpu(p, sd, start);
+               if ((unsigned)i < nr_cpumask_bits)
+                       return i;
+       }
+
+       if (sched_feat(IDLE_SMT)) {
+               i = select_idle_smt(p, sd, start);
+               if ((unsigned)i < nr_cpumask_bits)
+                       return i;
+       }
 
        return target;
 }
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -69,3 +69,13 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
 SCHED_FEAT(ATTACH_AGE_LOAD, true)
 
+SCHED_FEAT(OLD_IDLE, false)
+SCHED_FEAT(ORDER_IDLE, false)
+
+SCHED_FEAT(IDLE_CORE, true)
+SCHED_FEAT(IDLE_CPU, true)
+SCHED_FEAT(AVG_CPU, true)
+SCHED_FEAT(PRINT_AVG, false)
+
+SCHED_FEAT(IDLE_SMT, true)
+
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -334,6 +334,13 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
        {
+               .procname       = "sched_shift",
+               .data           = &sysctl_sched_shift,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
                .procname       = "sched_nr_migrate",
                .data           = &sysctl_sched_nr_migrate,
                .maxlen         = sizeof(unsigned int),


Reply via email to