These are the changes which change the scheduler behavior based on the
cpus_preferred mask. Keep in mind that when the system call changes
cpus_allowed mask, cpus_preferred and cpus_allowed become the same.

Signed-off-by: Rohit Jain <rohit.k.j...@oracle.com>
---
 kernel/sched/cpudeadline.c |   4 +-
 kernel/sched/cpupri.c      |   4 +-
 kernel/sched/fair.c        | 116 +++++++++++++++++++++++++++++++++------------
 3 files changed, 91 insertions(+), 33 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 8d9562d..32135b9 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -127,13 +127,13 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
        const struct sched_dl_entity *dl_se = &p->dl;
 
        if (later_mask &&
-           cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
+           cpumask_and(later_mask, cp->free_cpus, &p->cpus_preferred)) {
                return 1;
        } else {
                int best_cpu = cpudl_maximum(cp);
                WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
 
-               if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
+               if (cpumask_test_cpu(best_cpu, &p->cpus_preferred) &&
                    dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
                        if (later_mask)
                                cpumask_set_cpu(best_cpu, later_mask);
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 2511aba..9641b8d 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
                if (skip)
                        continue;
 
-               if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
+               if (cpumask_any_and(&p->cpus_preferred, vec->mask) >= 
nr_cpu_ids)
                        continue;
 
                if (lowest_mask) {
-                       cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+                       cpumask_and(lowest_mask, &p->cpus_preferred, vec->mask);
 
                        /*
                         * We have to ensure that we have at least one bit
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eca6a57..35e73c7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5805,7 +5805,7 @@ find_idlest_group(struct sched_domain *sd, struct 
task_struct *p,
 
                /* Skip over this group if it has no CPUs allowed */
                if (!cpumask_intersects(sched_group_span(group),
-                                       &p->cpus_allowed))
+                                       &p->cpus_preferred))
                        continue;
 
                local_group = cpumask_test_cpu(this_cpu,
@@ -5925,7 +5925,7 @@ find_idlest_cpu(struct sched_group *group, struct 
task_struct *p, int this_cpu)
                return cpumask_first(sched_group_span(group));
 
        /* Traverse only the allowed CPUs */
-       for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
+       for_each_cpu_and(i, sched_group_span(group), &p->cpus_preferred) {
                if (idle_cpu(i)) {
                        struct rq *rq = cpu_rq(i);
                        struct cpuidle_state *idle = idle_get_state(rq);
@@ -6011,6 +6011,27 @@ void __update_idle_core(struct rq *rq)
        rcu_read_unlock();
 }
 
+static inline int
+scan_cpu_mask_for_idle_cores(struct cpumask *cpus, int target)
+{
+       int core, cpu;
+
+       for_each_cpu_wrap(core, cpus, target) {
+               bool idle = true;
+
+               for_each_cpu(cpu, cpu_smt_mask(core)) {
+                       cpumask_clear_cpu(cpu, cpus);
+                       if (!idle_cpu(cpu))
+                               idle = false;
+               }
+
+               if (idle)
+                       return core;
+       }
+
+       return -1;
+}
+
 /*
  * Scan the entire LLC domain for idle cores; this dynamically switches off if
  * there are no idle cores left in the system; tracked through
@@ -6019,7 +6040,8 @@ void __update_idle_core(struct rq *rq)
 static int select_idle_core(struct task_struct *p, struct sched_domain *sd, 
int target)
 {
        struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-       int core, cpu;
+       struct cpumask *pcpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+       int core;
 
        if (!static_branch_likely(&sched_smt_present))
                return -1;
@@ -6028,20 +6050,21 @@ static int select_idle_core(struct task_struct *p, 
struct sched_domain *sd, int
                return -1;
 
        cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
+       cpumask_and(pcpus, cpus, &p->cpus_preferred);
+       core = scan_cpu_mask_for_idle_cores(pcpus, target);
 
-       for_each_cpu_wrap(core, cpus, target) {
-               bool idle = true;
+       if (core >= 0)
+               return core;
 
-               for_each_cpu(cpu, cpu_smt_mask(core)) {
-                       cpumask_clear_cpu(cpu, cpus);
-                       if (!idle_cpu(cpu))
-                               idle = false;
-               }
+       if (cpumask_equal(cpus, pcpus))
+               goto out;
 
-               if (idle)
-                       return core;
-       }
+       cpumask_andnot(cpus, cpus, pcpus);
+       core = scan_cpu_mask_for_idle_cores(cpus, target);
 
+       if (core >= 0)
+               return core;
+out:
        /*
         * Failed to find an idle core; stop looking for one.
         */
@@ -6050,24 +6073,40 @@ static int select_idle_core(struct task_struct *p, 
struct sched_domain *sd, int
        return -1;
 }
 
+static inline int
+scan_cpu_mask_for_idle_smt(struct cpumask *cpus, int target)
+{
+       int cpu;
+
+       for_each_cpu(cpu, cpu_smt_mask(target)) {
+               if (!cpumask_test_cpu(cpu, cpus))
+                       continue;
+               if (idle_cpu(cpu))
+                       return cpu;
+       }
+
+       return -1;
+}
+
 /*
  * Scan the local SMT mask for idle CPUs.
  */
 static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int 
target)
 {
+       struct cpumask *cpus = &p->cpus_allowed;
        int cpu;
 
        if (!static_branch_likely(&sched_smt_present))
                return -1;
 
-       for_each_cpu(cpu, cpu_smt_mask(target)) {
-               if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-                       continue;
-               if (idle_cpu(cpu))
-                       return cpu;
-       }
+       cpu = scan_cpu_mask_for_idle_smt(&p->cpus_preferred, target);
 
-       return -1;
+       if (cpu >= 0 || cpumask_equal(&p->cpus_preferred, cpus))
+               return cpu;
+
+       cpumask_andnot(cpus, cpus, &p->cpus_preferred);
+
+       return scan_cpu_mask_for_idle_smt(cpus, target);
 }
 
 #else /* CONFIG_SCHED_SMT */
@@ -6084,6 +6123,24 @@ static inline int select_idle_smt(struct task_struct *p, 
struct sched_domain *sd
 
 #endif /* CONFIG_SCHED_SMT */
 
+static inline int
+scan_cpu_mask_for_idle_cpu(struct cpumask *cpus, int target,
+                          struct sched_domain *sd, int nr)
+{
+       int cpu;
+
+       for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+               if (!--nr)
+                       return -1;
+               if (!cpumask_test_cpu(cpu, cpus))
+                       continue;
+               if (idle_cpu(cpu))
+                       break;
+       }
+
+       return cpu;
+}
+
 /*
  * Scan the LLC domain for idle CPUs; this is dynamically regulated by
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
@@ -6092,6 +6149,7 @@ static inline int select_idle_smt(struct task_struct *p, 
struct sched_domain *sd
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int 
target)
 {
        struct sched_domain *this_sd;
+       struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
        u64 avg_cost, avg_idle;
        u64 time, cost;
        s64 delta;
@@ -6121,15 +6179,15 @@ static int select_idle_cpu(struct task_struct *p, 
struct sched_domain *sd, int t
 
        time = local_clock();
 
-       for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
-               if (!--nr)
-                       return -1;
-               if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-                       continue;
-               if (idle_cpu(cpu))
-                       break;
-       }
+       cpu = scan_cpu_mask_for_idle_cpu(&p->cpus_preferred, target, sd, nr);
+
+       if (cpu >= 0 || cpumask_equal(&p->cpus_preferred, &p->cpus_allowed))
+               goto out;
 
+       cpumask_andnot(cpus, &p->cpus_allowed, &p->cpus_preferred);
+
+       cpu = scan_cpu_mask_for_idle_cpu(cpus, target, sd, nr);
+out:
        time = local_clock() - time;
        cost = this_sd->avg_scan_cost;
        delta = (s64)(time - cost) / 8;
@@ -6279,7 +6337,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, 
int sd_flag, int wake_f
        if (sd_flag & SD_BALANCE_WAKE) {
                record_wakee(p);
                want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
-                             && cpumask_test_cpu(cpu, &p->cpus_allowed);
+                             && cpumask_test_cpu(cpu, &p->cpus_preferred);
        }
 
        rcu_read_lock();
-- 
2.7.4

Reply via email to