On Mon, Jul 08, 2019 at 03:43:55PM +0200, Thomas Gleixner wrote:
> Revaluating the bitmap wheight of the online cpus bitmap in every

s/wheight/weight/?

> invocation of num_online_cpus() over and over is a pretty useless
> exercise. Especially when num_online_cpus() is used in code pathes like the
> IPI delivery of x86 or the membarrier code.
> 
> Cache the number of online CPUs in the core and just return the cached
> variable.

I do like this and the comments on limited guarantees make sense.
One suggestion for saving a few lines below, but either way:

Acked-by: Paul E. McKenney <paul...@linux.ibm.com>

> Signed-off-by: Thomas Gleixner <t...@linutronix.de>
> ---
> V2: Use READ/WRITE_ONCE() and add comment what it actually achieves. Remove
>     the bogus lockdep assert in the write path as the caller cannot hold the
>     lock. It's a task on the plugged CPU which is not the controlling task.
> ---
>  include/linux/cpumask.h |   26 +++++++++++++++++---------
>  kernel/cpu.c            |   22 ++++++++++++++++++++++
>  2 files changed, 39 insertions(+), 9 deletions(-)
> 
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -95,8 +95,23 @@ extern struct cpumask __cpu_active_mask;
>  #define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
>  #define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
>  
> +extern unsigned int __num_online_cpus;
> +
>  #if NR_CPUS > 1
> -#define num_online_cpus()    cpumask_weight(cpu_online_mask)
> +/**
> + * num_online_cpus() - Read the number of online CPUs
> + *
> + * READ_ONCE() protects against theoretical load tearing and prevents
> + * the compiler from reloading the value in a function or loop.
> + *
> + * Even with that, this interface gives only a momentary snapshot and is
> + * not protected against concurrent CPU hotplug operations unless invoked
> + * from a cpuhp_lock held region.
> + */
> +static inline unsigned int num_online_cpus(void)
> +{
> +     return READ_ONCE(__num_online_cpus);
> +}
>  #define num_possible_cpus()  cpumask_weight(cpu_possible_mask)
>  #define num_present_cpus()   cpumask_weight(cpu_present_mask)
>  #define num_active_cpus()    cpumask_weight(cpu_active_mask)
> @@ -805,14 +820,7 @@ set_cpu_present(unsigned int cpu, bool p
>               cpumask_clear_cpu(cpu, &__cpu_present_mask);
>  }
>  
> -static inline void
> -set_cpu_online(unsigned int cpu, bool online)
> -{
> -     if (online)
> -             cpumask_set_cpu(cpu, &__cpu_online_mask);
> -     else
> -             cpumask_clear_cpu(cpu, &__cpu_online_mask);
> -}
> +void set_cpu_online(unsigned int cpu, bool online);
>  
>  static inline void
>  set_cpu_active(unsigned int cpu, bool active)
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -2288,6 +2288,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
>  struct cpumask __cpu_active_mask __read_mostly;
>  EXPORT_SYMBOL(__cpu_active_mask);
>  
> +unsigned int __num_online_cpus __read_mostly;
> +EXPORT_SYMBOL(__num_online_cpus);
> +
>  void init_cpu_present(const struct cpumask *src)
>  {
>       cpumask_copy(&__cpu_present_mask, src);
> @@ -2303,6 +2306,25 @@ void init_cpu_online(const struct cpumas
>       cpumask_copy(&__cpu_online_mask, src);
>  }
>  
> +void set_cpu_online(unsigned int cpu, bool online)
> +{
> +     int adj = 0;
> +
> +     if (online) {
> +             if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
> +                     adj = 1;
> +     } else {
> +             if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
> +                     adj = -1;
> +     }
> +     /*
> +      * WRITE_ONCE() protects only against the theoretical stupidity of
> +      * a compiler to tear the store, but won't protect readers which
> +      * are not serialized against concurrent hotplug operations.
> +      */
> +     WRITE_ONCE(__num_online_cpus, __num_online_cpus + adj);

        WRITE_ONCE(__num_online_cpus, cpumask_weight(__cpu_online_mask));

Then "adj" can be dispensed with, and the old non-value-returning atomic
updates can be used on __cpu_online_mask.  Or is someone now depending
on full ordering from set_cpu_online() or some such?

> +}
> +
>  /*
>   * Activate the first processor.
>   */
> 

Reply via email to