From: Rik van Riel <r...@redhat.com>

Ensure that cpus specified with the isolcpus= boot commandline
option stay outside of the load balancing in the kernel scheduler.

Operations like load balancing can introduce unwanted latencies,
which is exactly what the isolcpus= commandline is there to prevent.

Previously, simply creating a new cpuset, without even touching the
cpuset.cpus field inside the new cpuset, would undo the effects of
isolcpus=, by creating a scheduler domain spanning the whole system,
and setting up load balancing inside that domain. The cpuset root
cpuset.cpus file is read-only, so there was not even a way to undo
that effect.

This does not impact the majority of cpusets users, since isolcpus=
is a fairly specialized feature used for realtime purposes.

Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Clark Williams <willi...@redhat.com>
Cc: Li Zefan <lize...@huawei.com>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Luiz Capitulino <lcapitul...@redhat.com>
Cc: Mike Galbraith <umgwanakikb...@gmail.com>
Cc: cgro...@vger.kernel.org
Signed-off-by: Rik van Riel <r...@redhat.com>
Tested-by: David Rientjes <rient...@google.com>
---
 include/linux/sched.h |  2 ++
 kernel/cpuset.c       | 13 +++++++++++--
 kernel/sched/core.c   |  2 +-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d77432e14ff..aeae02435717 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1038,6 +1038,8 @@ static inline struct cpumask *sched_domain_span(struct 
sched_domain *sd)
 extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
                                    struct sched_domain_attr *dattr_new);
 
+extern cpumask_var_t cpu_isolated_map;
+
 /* Allocate an array of sched domains, for partition_sched_domains(). */
 cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
 void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1d1fe9361d29..b544e5229d99 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -625,6 +625,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
        int csn;                /* how many cpuset ptrs in csa so far */
        int i, j, k;            /* indices for partition finding loops */
        cpumask_var_t *doms;    /* resulting partition; i.e. sched domains */
+       cpumask_var_t non_isolated_cpus;  /* load balanced CPUs */
        struct sched_domain_attr *dattr;  /* attributes for custom domains */
        int ndoms = 0;          /* number of sched domains in result */
        int nslot;              /* next empty doms[] struct cpumask slot */
@@ -634,6 +635,10 @@ static int generate_sched_domains(cpumask_var_t **domains,
        dattr = NULL;
        csa = NULL;
 
+       if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
+               goto done;
+       cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
+
        /* Special case for the 99% of systems with one, full, sched domain */
        if (is_sched_load_balance(&top_cpuset)) {
                ndoms = 1;
@@ -646,7 +651,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
                        *dattr = SD_ATTR_INIT;
                        update_domain_attr_tree(dattr, &top_cpuset);
                }
-               cpumask_copy(doms[0], top_cpuset.effective_cpus);
+               cpumask_and(doms[0], top_cpuset.effective_cpus,
+                                    non_isolated_cpus);
 
                goto done;
        }
@@ -669,7 +675,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
                 * the corresponding sched domain.
                 */
                if (!cpumask_empty(cp->cpus_allowed) &&
-                   !is_sched_load_balance(cp))
+                   !(is_sched_load_balance(cp) &&
+                     cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
                        continue;
 
                if (is_sched_load_balance(cp))
@@ -751,6 +758,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
 
                        if (apn == b->pn) {
                                cpumask_or(dp, dp, b->effective_cpus);
+                               cpumask_and(dp, dp, non_isolated_cpus);
                                if (dattr)
                                        update_domain_attr_tree(dattr + nslot, 
b);
 
@@ -763,6 +771,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
        BUG_ON(nslot != ndoms);
 
 done:
+       free_cpumask_var(non_isolated_cpus);
        kfree(csa);
 
        /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f0f831e8a345..3db1beace19b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5812,7 +5812,7 @@ cpu_attach_domain(struct sched_domain *sd, struct 
root_domain *rd, int cpu)
 }
 
 /* cpus with isolated domains */
-static cpumask_var_t cpu_isolated_map;
+cpumask_var_t cpu_isolated_map;
 
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to