On Tue, Feb 09, 2021 at 02:16:46PM -0800, Nadav Amit wrote:
> From: Nadav Amit <na...@vmware.com>
> 
> Currently, on_each_cpu() and similar functions do not exploit the
> potential of concurrency: the function is first executed remotely and
> only then it is executed locally. Functions such as TLB flush can take
> considerable time, so this provides an opportunity for performance
> optimization.
> 
> To do so, modify smp_call_function_many_cond(), to allows the callers to
> provide a function that should be executed (remotely/locally), and run
> them concurrently. Keep other smp_call_function_many() semantic as it is
> today for backward compatibility: the called function is not executed in
> this case locally.
> 
> smp_call_function_many_cond() does not use the optimized version for a
> single remote target that smp_call_function_single() implements. For
> synchronous function call, smp_call_function_single() keeps a
> call_single_data (which is used for synchronization) on the stack.
> Interestingly, it seems that not using this optimization provides
> greater performance improvements (greater speedup with a single remote
> target than with multiple ones). Presumably, holding data structures
> that are intended for synchronization on the stack can introduce
> overheads due to TLB misses and false-sharing when the stack is used for
> other purposes.
> 
> Reviewed-by: Dave Hansen <dave.han...@linux.intel.com>
> Cc: Peter Zijlstra <pet...@infradead.org>
> Cc: Rik van Riel <r...@surriel.com>
> Cc: Thomas Gleixner <t...@linutronix.de>
> Cc: Andy Lutomirski <l...@kernel.org>
> Cc: Josh Poimboeuf <jpoim...@redhat.com>
> Signed-off-by: Nadav Amit <na...@vmware.com>

Kernel-CI is giving me a regression that's most likely this patch:

  https://kernelci.org/test/case/id/602bdd621c979f83faaddcc6/

I'm not sure I can explain it yet. It did get me looking at
on_each_cpu() and it appears that wants to be converted too, something
like the below perhaps.


--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -848,14 +848,7 @@ void __init smp_init(void)
  */
 void on_each_cpu(smp_call_func_t func, void *info, int wait)
 {
-       unsigned long flags;
-
-       preempt_disable();
-       smp_call_function(func, info, wait);
-       local_irq_save(flags);
-       func(info);
-       local_irq_restore(flags);
-       preempt_enable();
+       on_each_cpu_mask(cpu_online_mask, func, info, wait);
 }
 EXPORT_SYMBOL(on_each_cpu);
 
@@ -878,15 +871,7 @@ EXPORT_SYMBOL(on_each_cpu);
 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
                        void *info, bool wait)
 {
-       unsigned int scf_flags;
-
-       scf_flags = SCF_RUN_LOCAL;
-       if (wait)
-               scf_flags |= SCF_WAIT;
-
-       preempt_disable();
-       smp_call_function_many_cond(mask, func, info, scf_flags, NULL);
-       preempt_enable();
+       on_each_cpu_cond_mask(NULL, func, info, wait, mask);
 }
 EXPORT_SYMBOL(on_each_cpu_mask);
 
@@ -912,6 +897,13 @@ EXPORT_SYMBOL(on_each_cpu_mask);
  * You must not call this function with disabled interrupts or
  * from a hardware interrupt handler or from a bottom half handler.
  */
+void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
+                     void *info, bool wait)
+{
+       on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
+}
+EXPORT_SYMBOL(on_each_cpu_cond);
+
 void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
                           void *info, bool wait, const struct cpumask *mask)
 {
@@ -926,13 +918,6 @@ void on_each_cpu_cond_mask(smp_cond_func
 }
 EXPORT_SYMBOL(on_each_cpu_cond_mask);
 
-void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
-                     void *info, bool wait)
-{
-       on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
-}
-EXPORT_SYMBOL(on_each_cpu_cond);
-
 static void do_nothing(void *unused)
 {
 }
~

Reply via email to