Sorry for the late response but this patch is a drawback,. Its back to about 
0.4 sec per cpu bring up.
This is when possible, present and isolcpus are 16-4095
Most time is spent at:
register_sched_domain_sysctl() calling sd_sysctl_header = 
register_sysctl_table(sd_ctl_root);

[   22.150000] ## CPU16 LIVE ##: Executing Code...
[   22.170000] partition_sched_domains start
[   22.220000] register_sched_domain_sysctl start
[   22.580000] register_sched_domain_sysctl end
[   22.580000] partition_sched_domains end


> BTW, what physical size does your toy have? I'm thinking its less than
> multiple racks worth like the SGI systems were.
It's a single chip with 4K cpus, capable of 400Gbps duplex. Evaluation board is 
pizza box size. 

Thanks


> -----Original Message-----
> From: Peter Zijlstra [mailto:pet...@infradead.org]
> Sent: Thursday, August 10, 2017 6:45 PM
> To: Ofer Levi(SW) <ofe...@mellanox.com>
> Cc: ru...@rustcorp.com.au; mi...@redhat.com;
> vineet.gup...@synopsys.com; linux-kernel@vger.kernel.org; Tejun Heo
> <t...@kernel.org>
> Subject: Re: hotplug support for arch/arc/plat-eznps platform
> 
> On Thu, Aug 10, 2017 at 11:19:05AM +0200, Peter Zijlstra wrote:
> > On Thu, Aug 10, 2017 at 07:40:16AM +0000, Ofer Levi(SW) wrote:
> > > Well, this definitely have pleased the little toy :) Thank you. I
> > > really appreciate your time and effort.
> > >
> > > If I may, one more newbie question. What do I need to do for the two
> > > patches to find their way into formal kernel code?
> >
> > I'll split the first patch into two separate patches and line them up.
> >
> > I'm not sure about this last patch, I'll speak with Ingo once he's
> > back to see what would be the thing to do here.
> >
> > I suspect we can make it work, that sysctl stuff is only debug crud
> > after all and that should never get in the way of getting work done.
> 
> Can you test this instead of the second patch? It should have the same
> effect.
> 
> 
> ---
> Subject: sched/debug: Optimize sched_domain sysctl generation
> From: Peter Zijlstra <pet...@infradead.org>
> Date: Thu Aug 10 17:10:26 CEST 2017
> 
> Currently we unconditionally destroy all sysctl bits and regenerate them after
> we've rebuild the domains (even if that rebuild is a no-op).
> 
> And since we unconditionally (re)build the sysctl for all possible CPUs,
> onlining all CPUs gets us O(n^2) time. Instead change this to only rebuild the
> bits for CPUs we've actually installed new domains on.
> 
> Reported-by: "Ofer Levi(SW)" <ofe...@mellanox.com>
> Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
> ---
>  kernel/sched/debug.c    |   68
> ++++++++++++++++++++++++++++++++++++++----------
>  kernel/sched/sched.h    |    4 ++
>  kernel/sched/topology.c |    1
>  3 files changed, 59 insertions(+), 14 deletions(-)
> 
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -327,38 +327,78 @@ static struct ctl_table *sd_alloc_ctl_cp
>       return table;
>  }
> 
> +static cpumask_var_t sd_sysctl_cpus;
>  static struct ctl_table_header *sd_sysctl_header;
> +
>  void register_sched_domain_sysctl(void)
>  {
> -     int i, cpu_num = num_possible_cpus();
> -     struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
> +     static struct ctl_table *cpu_entries;
> +     static struct ctl_table **cpu_idx;
>       char buf[32];
> +     int i;
> +
> +     if (!cpu_entries) {
> +             cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
> +             if (!cpu_entries)
> +                     return;
> +
> +             WARN_ON(sd_ctl_dir[0].child);
> +             sd_ctl_dir[0].child = cpu_entries;
> +     }
> +
> +     if (!cpu_idx) {
> +             struct ctl_table *e = cpu_entries;
> +
> +             cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*),
> GFP_KERNEL);
> +             if (!cpu_idx)
> +                     return;
> +
> +             /* deal with sparse possible map */
> +             for_each_possible_cpu(i) {
> +                     cpu_idx[i] = e;
> +                     e++;
> +             }
> +     }
> 
> -     WARN_ON(sd_ctl_dir[0].child);
> -     sd_ctl_dir[0].child = entry;
> +     if (!cpumask_available(sd_sysctl_cpus)) {
> +             if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
> +                     return;
> 
> -     if (entry == NULL)
> -             return;
> +             /* init to possible to not have holes in @cpu_entries */
> +             cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
> +     }
> +
> +     for_each_cpu(i, sd_sysctl_cpus) {
> +             struct ctl_table *e = cpu_idx[i];
> +
> +             if (e->child)
> +                     sd_free_ctl_entry(&e->child);
> +
> +             if (!e->procname) {
> +                     snprintf(buf, 32, "cpu%d", i);
> +                     e->procname = kstrdup(buf, GFP_KERNEL);
> +             }
> +             e->mode = 0555;
> +             e->child = sd_alloc_ctl_cpu_table(i);
> 
> -     for_each_possible_cpu(i) {
> -             snprintf(buf, 32, "cpu%d", i);
> -             entry->procname = kstrdup(buf, GFP_KERNEL);
> -             entry->mode = 0555;
> -             entry->child = sd_alloc_ctl_cpu_table(i);
> -             entry++;
> +             __cpumask_clear_cpu(i, sd_sysctl_cpus);
>       }
> 
>       WARN_ON(sd_sysctl_header);
>       sd_sysctl_header = register_sysctl_table(sd_ctl_root);
>  }
> 
> +void dirty_sched_domain_sysctl(int cpu) {
> +     if (cpumask_available(sd_sysctl_cpus))
> +             __cpumask_set_cpu(cpu, sd_sysctl_cpus); }
> +
>  /* may be called multiple times per register */  void
> unregister_sched_domain_sysctl(void)
>  {
>       unregister_sysctl_table(sd_sysctl_header);
>       sd_sysctl_header = NULL;
> -     if (sd_ctl_dir[0].child)
> -             sd_free_ctl_entry(&sd_ctl_dir[0].child);
>  }
>  #endif /* CONFIG_SYSCTL */
>  #endif /* CONFIG_SMP */
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1120,11 +1120,15 @@ extern int group_balance_cpu(struct sche
> 
>  #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)  void
> register_sched_domain_sysctl(void);
> +void dirty_sched_domain_sysctl(int cpu);
>  void unregister_sched_domain_sysctl(void);
>  #else
>  static inline void register_sched_domain_sysctl(void)
>  {
>  }
> +static inline void dirty_sched_domain_sysctl(int cpu) { }
>  static inline void unregister_sched_domain_sysctl(void)
>  {
>  }
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -461,6 +461,7 @@ cpu_attach_domain(struct sched_domain *s
>       rq_attach_root(rq, rd);
>       tmp = rq->sd;
>       rcu_assign_pointer(rq->sd, sd);
> +     dirty_sched_domain_sysctl(cpu);
>       destroy_sched_domains(tmp);
> 
>       update_top_cache_domain(cpu);

Reply via email to