Sorry for the late response but this patch is a drawback,. Its back to about 0.4 sec per cpu bring up. This is when possible, present and isolcpus are 16-4095 Most time is spent at: register_sched_domain_sysctl() calling sd_sysctl_header = register_sysctl_table(sd_ctl_root);
[ 22.150000] ## CPU16 LIVE ##: Executing Code... [ 22.170000] partition_sched_domains start [ 22.220000] register_sched_domain_sysctl start [ 22.580000] register_sched_domain_sysctl end [ 22.580000] partition_sched_domains end > BTW, what physical size does your toy have? I'm thinking its less than > multiple racks worth like the SGI systems were. It's a single chip with 4K cpus, capable of 400Gbps duplex. Evaluation board is pizza box size. Thanks > -----Original Message----- > From: Peter Zijlstra [mailto:pet...@infradead.org] > Sent: Thursday, August 10, 2017 6:45 PM > To: Ofer Levi(SW) <ofe...@mellanox.com> > Cc: ru...@rustcorp.com.au; mi...@redhat.com; > vineet.gup...@synopsys.com; linux-kernel@vger.kernel.org; Tejun Heo > <t...@kernel.org> > Subject: Re: hotplug support for arch/arc/plat-eznps platform > > On Thu, Aug 10, 2017 at 11:19:05AM +0200, Peter Zijlstra wrote: > > On Thu, Aug 10, 2017 at 07:40:16AM +0000, Ofer Levi(SW) wrote: > > > Well, this definitely have pleased the little toy :) Thank you. I > > > really appreciate your time and effort. > > > > > > If I may, one more newbie question. What do I need to do for the two > > > patches to find their way into formal kernel code? > > > > I'll split the first patch into two separate patches and line them up. > > > > I'm not sure about this last patch, I'll speak with Ingo once he's > > back to see what would be the thing to do here. > > > > I suspect we can make it work, that sysctl stuff is only debug crud > > after all and that should never get in the way of getting work done. > > Can you test this instead of the second patch? It should have the same > effect. > > > --- > Subject: sched/debug: Optimize sched_domain sysctl generation > From: Peter Zijlstra <pet...@infradead.org> > Date: Thu Aug 10 17:10:26 CEST 2017 > > Currently we unconditionally destroy all sysctl bits and regenerate them after > we've rebuild the domains (even if that rebuild is a no-op). > > And since we unconditionally (re)build the sysctl for all possible CPUs, > onlining all CPUs gets us O(n^2) time. Instead change this to only rebuild the > bits for CPUs we've actually installed new domains on. > > Reported-by: "Ofer Levi(SW)" <ofe...@mellanox.com> > Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> > --- > kernel/sched/debug.c | 68 > ++++++++++++++++++++++++++++++++++++++---------- > kernel/sched/sched.h | 4 ++ > kernel/sched/topology.c | 1 > 3 files changed, 59 insertions(+), 14 deletions(-) > > --- a/kernel/sched/debug.c > +++ b/kernel/sched/debug.c > @@ -327,38 +327,78 @@ static struct ctl_table *sd_alloc_ctl_cp > return table; > } > > +static cpumask_var_t sd_sysctl_cpus; > static struct ctl_table_header *sd_sysctl_header; > + > void register_sched_domain_sysctl(void) > { > - int i, cpu_num = num_possible_cpus(); > - struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); > + static struct ctl_table *cpu_entries; > + static struct ctl_table **cpu_idx; > char buf[32]; > + int i; > + > + if (!cpu_entries) { > + cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1); > + if (!cpu_entries) > + return; > + > + WARN_ON(sd_ctl_dir[0].child); > + sd_ctl_dir[0].child = cpu_entries; > + } > + > + if (!cpu_idx) { > + struct ctl_table *e = cpu_entries; > + > + cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), > GFP_KERNEL); > + if (!cpu_idx) > + return; > + > + /* deal with sparse possible map */ > + for_each_possible_cpu(i) { > + cpu_idx[i] = e; > + e++; > + } > + } > > - WARN_ON(sd_ctl_dir[0].child); > - sd_ctl_dir[0].child = entry; > + if (!cpumask_available(sd_sysctl_cpus)) { > + if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL)) > + return; > > - if (entry == NULL) > - return; > + /* init to possible to not have holes in @cpu_entries */ > + cpumask_copy(sd_sysctl_cpus, cpu_possible_mask); > + } > + > + for_each_cpu(i, sd_sysctl_cpus) { > + struct ctl_table *e = cpu_idx[i]; > + > + if (e->child) > + sd_free_ctl_entry(&e->child); > + > + if (!e->procname) { > + snprintf(buf, 32, "cpu%d", i); > + e->procname = kstrdup(buf, GFP_KERNEL); > + } > + e->mode = 0555; > + e->child = sd_alloc_ctl_cpu_table(i); > > - for_each_possible_cpu(i) { > - snprintf(buf, 32, "cpu%d", i); > - entry->procname = kstrdup(buf, GFP_KERNEL); > - entry->mode = 0555; > - entry->child = sd_alloc_ctl_cpu_table(i); > - entry++; > + __cpumask_clear_cpu(i, sd_sysctl_cpus); > } > > WARN_ON(sd_sysctl_header); > sd_sysctl_header = register_sysctl_table(sd_ctl_root); > } > > +void dirty_sched_domain_sysctl(int cpu) { > + if (cpumask_available(sd_sysctl_cpus)) > + __cpumask_set_cpu(cpu, sd_sysctl_cpus); } > + > /* may be called multiple times per register */ void > unregister_sched_domain_sysctl(void) > { > unregister_sysctl_table(sd_sysctl_header); > sd_sysctl_header = NULL; > - if (sd_ctl_dir[0].child) > - sd_free_ctl_entry(&sd_ctl_dir[0].child); > } > #endif /* CONFIG_SYSCTL */ > #endif /* CONFIG_SMP */ > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -1120,11 +1120,15 @@ extern int group_balance_cpu(struct sche > > #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) void > register_sched_domain_sysctl(void); > +void dirty_sched_domain_sysctl(int cpu); > void unregister_sched_domain_sysctl(void); > #else > static inline void register_sched_domain_sysctl(void) > { > } > +static inline void dirty_sched_domain_sysctl(int cpu) { } > static inline void unregister_sched_domain_sysctl(void) > { > } > --- a/kernel/sched/topology.c > +++ b/kernel/sched/topology.c > @@ -461,6 +461,7 @@ cpu_attach_domain(struct sched_domain *s > rq_attach_root(rq, rd); > tmp = rq->sd; > rcu_assign_pointer(rq->sd, sd); > + dirty_sched_domain_sysctl(cpu); > destroy_sched_domains(tmp); > > update_top_cache_domain(cpu);