> On Jul 3, 2019, at 3:54 AM, Thomas Gleixner <[email protected]> wrote: > > Broadcast now depends on the fact that all present CPUs have been booted > once so handling broadcast IPIs is not doing any harm. In case that a CPU > is offline, it does not react to regular IPIs and the NMI handler returns > early. > > Signed-off-by: Thomas Gleixner <[email protected]> > --- > arch/x86/kernel/apic/ipi.c | 12 +++--------- > 1 file changed, 3 insertions(+), 9 deletions(-) > > --- a/arch/x86/kernel/apic/ipi.c > +++ b/arch/x86/kernel/apic/ipi.c > @@ -8,13 +8,7 @@ > DEFINE_STATIC_KEY_FALSE(apic_use_ipi_shorthand); > > #ifdef CONFIG_SMP > -#ifdef CONFIG_HOTPLUG_CPU > -#define DEFAULT_SEND_IPI (1) > -#else > -#define DEFAULT_SEND_IPI (0) > -#endif > - > -static int apic_ipi_shorthand_off __ro_after_init = DEFAULT_SEND_IPI; > +static int apic_ipi_shorthand_off __ro_after_init; > > static __init int apic_ipi_shorthand(char *str) > { > @@ -250,7 +244,7 @@ void default_send_IPI_allbutself(int vec > if (num_online_cpus() < 2) > return; > > - if (apic_ipi_shorthand_off || vector == NMI_VECTOR) { > + if (static_branch_likely(&apic_use_ipi_shorthand)) { > apic->send_IPI_mask_allbutself(cpu_online_mask, vector); > } else { > __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); > @@ -259,7 +253,7 @@ void default_send_IPI_allbutself(int vec > > void default_send_IPI_all(int vector) > { > - if (apic_ipi_shorthand_off || vector == NMI_VECTOR) { > + if (static_branch_likely(&apic_use_ipi_shorthand)) { > apic->send_IPI_mask(cpu_online_mask, vector); > } else { > __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
It may be better to check the static-key in native_send_call_func_ipi() (and other callers if there are any), and remove all the other checks in default_send_IPI_all(), x2apic_send_IPI_mask_allbutself(), etc. This would allow to remove potentially unnecessary checks in native_send_call_func_ipi()I also have this patch I still did not send to slightly improve the test in native_send_call_func_ipi(). -- >8 -- From: Nadav Amit <[email protected]> Date: Fri, 7 Jun 2019 15:11:44 -0700 Subject: [PATCH] x86/smp: Better check of allbutself Introduce for_each_cpu_and_not() for this matter. Signed-off-by: Nadav Amit <[email protected]> --- arch/x86/kernel/smp.c | 22 +++++++++++----------- include/asm-generic/bitops/find.h | 17 +++++++++++++++++ include/linux/cpumask.h | 17 +++++++++++++++++ lib/cpumask.c | 20 ++++++++++++++++++++ lib/find_bit.c | 21 ++++++++++++++++++--- 5 files changed, 83 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 96421f97e75c..7972ab593397 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -136,23 +136,23 @@ void native_send_call_func_single_ipi(int cpu) void native_send_call_func_ipi(const struct cpumask *mask) { - cpumask_var_t allbutself; - - if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { - apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - return; + int cpu, this_cpu = smp_processor_id(); + bool allbutself = true; + bool self = false; + + for_each_cpu_and_not(cpu, cpu_online_mask, mask) { + if (cpu != this_cpu) { + allbutself = false; + break; + } + self = true; } - cpumask_copy(allbutself, cpu_online_mask); - __cpumask_clear_cpu(smp_processor_id(), allbutself); - - if (cpumask_equal(mask, allbutself) && + if (allbutself && !self && cpumask_equal(cpu_online_mask, cpu_callout_mask)) apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); else apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - - free_cpumask_var(allbutself); } static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 8a1ee10014de..e5f19eec2737 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -32,6 +32,23 @@ extern unsigned long find_next_and_bit(const unsigned long *addr1, unsigned long offset); #endif +#ifndef find_next_and_bit +/** + * find_next_and_not_bit - find the next set bit in the the first region + * which is clear on the second + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +extern unsigned long find_next_and_not_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset); +#endif + #ifndef find_next_zero_bit /** * find_next_zero_bit - find the next cleared bit in a memory region diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 693124900f0a..4648add54fad 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -229,6 +229,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) } int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *); +int __pure cpumask_next_and_not(int n, const struct cpumask *, const struct cpumask *); int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); unsigned int cpumask_local_spread(unsigned int i, int node); @@ -291,6 +292,22 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool for ((cpu) = -1; \ (cpu) = cpumask_next_and((cpu), (mask), (and)), \ (cpu) < nr_cpu_ids;) + + +/** + * for_each_cpu_and_not - iterate over every cpu in @mask & ~@and_not + * @cpu: the (optionally unsigned) integer iterator + * @mask: the first cpumask pointer + * @and_not: the second cpumask pointer + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_and_not(cpu, mask, and_not) \ + for ((cpu) = -1; \ + (cpu) = cpumask_next_and_not((cpu), (mask), (and_not)), \ + (cpu) < nr_cpu_ids;) + + #endif /* SMP */ #define CPU_BITS_NONE \ diff --git a/lib/cpumask.c b/lib/cpumask.c index 0cb672eb107c..59c98f55c308 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -42,6 +42,26 @@ int cpumask_next_and(int n, const struct cpumask *src1p, } EXPORT_SYMBOL(cpumask_next_and); +/** + * cpumask_next_and_not - get the next cpu in *src1p & ~*src2p + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set in both. + */ +int cpumask_next_and_not(int n, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_and_not_bit(cpumask_bits(src1p), cpumask_bits(src2p), + nr_cpumask_bits, n + 1); +} +EXPORT_SYMBOL(cpumask_next_and_not); + + /** * cpumask_any_but - return a "random" in a cpumask, but not this one. * @mask: the cpumask to search diff --git a/lib/find_bit.c b/lib/find_bit.c index 5c51eb45178a..7bd2c567287e 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -23,7 +23,7 @@ /* * This is a common helper function for find_next_bit, find_next_zero_bit, and * find_next_and_bit. The differences are: - * - The "invert" argument, which is XORed with each fetched word before + * - The "invert" argument, which is XORed with each fetched first word before * searching it for one bits. * - The optional "addr2", which is anded with "addr1" if present. */ @@ -37,9 +37,9 @@ static inline unsigned long _find_next_bit(const unsigned long *addr1, return nbits; tmp = addr1[start / BITS_PER_LONG]; + tmp ^= invert; if (addr2) tmp &= addr2[start / BITS_PER_LONG]; - tmp ^= invert; /* Handle 1st word. */ tmp &= BITMAP_FIRST_WORD_MASK(start); @@ -51,9 +51,9 @@ static inline unsigned long _find_next_bit(const unsigned long *addr1, return nbits; tmp = addr1[start / BITS_PER_LONG]; + tmp ^= invert; if (addr2) tmp &= addr2[start / BITS_PER_LONG]; - tmp ^= invert; } return min(start + __ffs(tmp), nbits); @@ -91,6 +91,21 @@ unsigned long find_next_and_bit(const unsigned long *addr1, EXPORT_SYMBOL(find_next_and_bit); #endif +#if !defined(find_next_and_not_bit) +unsigned long find_next_and_not_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + /* + * Switching addr1 and addr2, since the first argument is the one that + * will be inverted. + */ + return _find_next_bit(addr2, addr1, size, offset, ~0UL); +} +EXPORT_SYMBOL(find_next_and_not_bit); +#endif + + #ifndef find_first_bit /* * Find the first set bit in a memory region. -- 2.20.1

