Regardless of the 'wait' argument, smp_call_function_many() must spin if any of the target CPUs have their csd busy waiting to be processed for a previous call. This may cause high tail latencies e.g. when some of the target CPUs are running functions that disable interrupts for a long time; getrusage() is one possible culprit.
Here we introduce a variant, __smp_call_function_many(), that adds a third 'best_effort' mode to the two existing ones (nowait, wait). In best effort mode, the call will skip CPUs whose csd is busy, and if any CPU is skipped it returns -EBUSY and the set of busy in the mask. This allows the caller to decide how to proceed, e.g. it might retry at a later time, or use a private csd, etc.. The new function is a compromise to avoid touching existing callers of smp_call_function_many(). If the feature is considered interesting, we could even replace the 'wait' argument with a ternary 'mode' in all smp_call_function_*() and derived methods. Signed-off-by: Luigi Rizzo <lri...@google.com> --- include/linux/smp.h | 10 ++++++ kernel/smp.c | 75 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 72 insertions(+), 13 deletions(-) diff --git a/include/linux/smp.h b/include/linux/smp.h index 70c6f6284dcf..5c6c7d3e1f19 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -75,6 +75,11 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, int smp_call_function_single_async(int cpu, call_single_data_t *csd); +/* Modes for __smp_call_function_many() */ +#define SMP_CFM_NOWAIT 0 +#define SMP_CFM_WAIT 1 +#define SMP_CFM_BEST_EFFORT 2 + #ifdef CONFIG_SMP #include <linux/preempt.h> @@ -120,6 +125,8 @@ extern void smp_cpus_done(unsigned int max_cpus); void smp_call_function(smp_call_func_t func, void *info, int wait); void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait); +int __smp_call_function_many(struct cpumask *mask, smp_call_func_t func, + void *info, int mode); int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, void *info, int wait); @@ -170,6 +177,9 @@ static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) +#define ____smp_call_function_many(mask, func, info, mode) \ + (up_smp_call_function(func, info), 0) + static inline void call_function_init(void) { } static inline int diff --git a/kernel/smp.c b/kernel/smp.c index aeb0adfa0606..75155875fadc 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -242,6 +242,18 @@ static __always_inline void csd_lock(call_single_data_t *csd) smp_wmb(); } +static __always_inline bool csd_trylock(call_single_data_t *csd) +{ + unsigned int flags = READ_ONCE(csd->node.u_flags); + + if (flags & CSD_FLAG_LOCK) + return false; + csd->node.u_flags |= CSD_FLAG_LOCK; + /* See csd_trylock() */ + smp_wmb(); + return true; +} + static __always_inline void csd_unlock(call_single_data_t *csd) { WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK)); @@ -608,12 +620,14 @@ int smp_call_function_any(const struct cpumask *mask, } EXPORT_SYMBOL_GPL(smp_call_function_any); -static void smp_call_function_many_cond(const struct cpumask *mask, - smp_call_func_t func, void *info, - bool wait, smp_cond_func_t cond_func) +static struct cpumask *smp_call_function_many_cond(const struct cpumask *mask, + smp_call_func_t func, + void *info, int mode, + smp_cond_func_t cond_func) { struct call_function_data *cfd; int cpu, next_cpu, this_cpu = smp_processor_id(); + bool busy = false, wait = (mode == SMP_CFM_WAIT); /* * Can deadlock when called with interrupts disabled. @@ -639,18 +653,18 @@ static void smp_call_function_many_cond(const struct cpumask *mask, /* No online cpus? We're done. */ if (cpu >= nr_cpu_ids) - return; + return NULL; /* Do we have another CPU which isn't us? */ next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); if (next_cpu == this_cpu) next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); - /* Fastpath: do that cpu by itself. */ - if (next_cpu >= nr_cpu_ids) { + /* Fastpath: if not best-effort do that cpu by itself. */ + if (next_cpu >= nr_cpu_ids && mode != SMP_CFM_BEST_EFFORT) { if (!cond_func || cond_func(cpu, info)) smp_call_function_single(cpu, func, info, wait); - return; + return NULL; } cfd = this_cpu_ptr(&cfd_data); @@ -660,7 +674,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, /* Some callers race with other cpus changing the passed mask */ if (unlikely(!cpumask_weight(cfd->cpumask))) - return; + return NULL; cpumask_clear(cfd->cpumask_ipi); for_each_cpu(cpu, cfd->cpumask) { @@ -669,9 +683,17 @@ static void smp_call_function_many_cond(const struct cpumask *mask, if (cond_func && !cond_func(cpu, info)) continue; - csd_lock(csd); - if (wait) - csd->node.u_flags |= CSD_TYPE_SYNC; + if (mode == SMP_CFM_BEST_EFFORT) { + if (!csd_trylock(csd)) { + cpumask_clear_cpu(cpu, cfd->cpumask); + busy = true; + continue; + } + } else { + csd_lock(csd); + if (wait) + csd->node.u_flags |= CSD_TYPE_SYNC; + } csd->func = func; csd->info = info; #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG @@ -693,8 +715,32 @@ static void smp_call_function_many_cond(const struct cpumask *mask, csd_lock_wait(csd); } } + return busy ? cfd->cpumask : NULL; } +/** + * Extended version of smp_call_function_many(). Same constraints. + * @mode == 0 same as wait = false, returns 0; + * @mode == 1 same as wait = true, returns 0; + * @mode = SMP_CFM_BEST_EFFORT: skips CPUs with previous pending requests, + * returns 0 and *mask unmodified if no CPUs are skipped, + * -EBUSY if CPUs are skipped, and *mask is the set of skipped CPUs + */ +int __smp_call_function_many(struct cpumask *mask, smp_call_func_t func, + void *info, int mode) +{ + struct cpumask *ret = smp_call_function_many_cond(mask, func, info, + mode, NULL); + + if (!ret) + return 0; + cpumask_andnot(mask, mask, ret); + cpumask_and(mask, mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), mask); + return -EBUSY; +} +EXPORT_SYMBOL(__smp_call_function_many); + /** * smp_call_function_many(): Run a function on a set of other CPUs. * @mask: The set of cpus to run on (only runs on online subset). @@ -712,7 +758,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask, void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait) { - smp_call_function_many_cond(mask, func, info, wait, NULL); + const int mode = wait ? SMP_CFM_WAIT : SMP_CFM_NOWAIT; + + smp_call_function_many_cond(mask, func, info, mode, NULL); } EXPORT_SYMBOL(smp_call_function_many); @@ -898,9 +946,10 @@ EXPORT_SYMBOL(on_each_cpu_mask); void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, void *info, bool wait, const struct cpumask *mask) { + const int mode = wait ? SMP_CFM_WAIT : SMP_CFM_NOWAIT; int cpu = get_cpu(); - smp_call_function_many_cond(mask, func, info, wait, cond_func); + smp_call_function_many_cond(mask, func, info, mode, cond_func); if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) { unsigned long flags; -- 2.31.1.368.gbe11c130af-goog