[PATCH v4 1/2] nmi_backtrace: Allow excluding an arbitrary CPU
The APIs that allow backtracing across CPUs have always had a way to exclude the current CPU. This convenience means callers didn't need to find a place to allocate a CPU mask just to handle the common case. Let's extend the API to take a CPU ID to exclude instead of just a boolean. This isn't any more complex for the API to handle and allows the hardlockup detector to exclude a different CPU (the one it already did a trace for) without needing to find space for a CPU mask. Arguably, this new API also encourages safer behavior. Specifically if the caller wants to avoid tracing the current CPU (maybe because they already traced the current CPU) this makes it more obvious to the caller that they need to make sure that the current CPU ID can't change. Acked-by: Michal Hocko Signed-off-by: Douglas Anderson --- Changes in v4: - Renamed trigger_allbutself_cpu_backtrace() for when trigger is unsupported. Changes in v3: - ("nmi_backtrace: Allow excluding an arbitrary CPU") new for v3. arch/arm/include/asm/irq.h | 2 +- arch/arm/kernel/smp.c| 4 ++-- arch/loongarch/include/asm/irq.h | 2 +- arch/loongarch/kernel/process.c | 4 ++-- arch/mips/include/asm/irq.h | 2 +- arch/mips/kernel/process.c | 4 ++-- arch/powerpc/include/asm/irq.h | 2 +- arch/powerpc/kernel/stacktrace.c | 4 ++-- arch/powerpc/kernel/watchdog.c | 4 ++-- arch/sparc/include/asm/irq_64.h | 2 +- arch/sparc/kernel/process_64.c | 6 +++--- arch/x86/include/asm/irq.h | 2 +- arch/x86/kernel/apic/hw_nmi.c| 4 ++-- include/linux/nmi.h | 14 +++--- kernel/watchdog.c| 2 +- lib/nmi_backtrace.c | 6 +++--- 16 files changed, 32 insertions(+), 32 deletions(-) diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index 18605f1b3580..26c1d2ced4ce 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -32,7 +32,7 @@ void handle_IRQ(unsigned int, struct pt_regs *); #include extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self); + int exclude_cpu); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 6756203e45f3..3431c0553f45 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -846,7 +846,7 @@ static void raise_nmi(cpumask_t *mask) __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask); } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) { - nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_nmi); + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_nmi); } diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index a115e8999c69..218b4da0ea90 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -40,7 +40,7 @@ void spurious_interrupt(void); #define NR_IRQS_LEGACY 16 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace -void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); +void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu); #define MAX_IO_PICS 2 #define NR_IRQS(64 + (256 * MAX_IO_PICS)) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2e04eb07abb6..778e8d09953e 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -345,9 +345,9 @@ static void raise_backtrace(cpumask_t *mask) } } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) { - nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace); + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace); } #ifdef CONFIG_64BIT diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 75abfa834ab7..3a848e7e69f7 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -77,7 +77,7 @@ extern int cp0_fdc_irq; extern int get_c0_fdc_int(void); void arch_trigger_cpumask_backtrace(const struct cpumask *mask, - bool exclude_self); + int exclude_cpu); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif /* _ASM_IRQ_H */ diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index a3225912c862..5387ed0a5186 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -750,9 +750,9 @@ static void raise_backtrace(cpumask_t *mask) } } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtr
[PATCH v3 1/2] nmi_backtrace: Allow excluding an arbitrary CPU
The APIs that allow backtracing across CPUs have always had a way to exclude the current CPU. This convenience means callers didn't need to find a place to allocate a CPU mask just to handle the common case. Let's extend the API to take a CPU ID to exclude instead of just a boolean. This isn't any more complex for the API to handle and allows the hardlockup detector to exclude a different CPU (the one it already did a trace for) without needing to find space for a CPU mask. Arguably, this new API also encourages safer behavior. Specifically if the caller wants to avoid tracing the current CPU (maybe because they already traced the current CPU) this makes it more obvious to the caller that they need to make sure that the current CPU ID can't change. Signed-off-by: Douglas Anderson --- Changes in v3: - ("nmi_backtrace: Allow excluding an arbitrary CPU") new for v3. arch/arm/include/asm/irq.h | 2 +- arch/arm/kernel/smp.c| 4 ++-- arch/loongarch/include/asm/irq.h | 2 +- arch/loongarch/kernel/process.c | 4 ++-- arch/mips/include/asm/irq.h | 2 +- arch/mips/kernel/process.c | 4 ++-- arch/powerpc/include/asm/irq.h | 2 +- arch/powerpc/kernel/stacktrace.c | 4 ++-- arch/powerpc/kernel/watchdog.c | 4 ++-- arch/sparc/include/asm/irq_64.h | 2 +- arch/sparc/kernel/process_64.c | 6 +++--- arch/x86/include/asm/irq.h | 2 +- arch/x86/kernel/apic/hw_nmi.c| 4 ++-- include/linux/nmi.h | 12 ++-- kernel/watchdog.c| 2 +- lib/nmi_backtrace.c | 6 +++--- 16 files changed, 31 insertions(+), 31 deletions(-) diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index 18605f1b3580..26c1d2ced4ce 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -32,7 +32,7 @@ void handle_IRQ(unsigned int, struct pt_regs *); #include extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self); + int exclude_cpu); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 6756203e45f3..3431c0553f45 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -846,7 +846,7 @@ static void raise_nmi(cpumask_t *mask) __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask); } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) { - nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_nmi); + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_nmi); } diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index a115e8999c69..218b4da0ea90 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -40,7 +40,7 @@ void spurious_interrupt(void); #define NR_IRQS_LEGACY 16 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace -void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); +void arch_trigger_cpumask_backtrace(const struct cpumask *mask, int exclude_cpu); #define MAX_IO_PICS 2 #define NR_IRQS(64 + (256 * MAX_IO_PICS)) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2e04eb07abb6..778e8d09953e 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -345,9 +345,9 @@ static void raise_backtrace(cpumask_t *mask) } } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) { - nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace); + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace); } #ifdef CONFIG_64BIT diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 75abfa834ab7..3a848e7e69f7 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -77,7 +77,7 @@ extern int cp0_fdc_irq; extern int get_c0_fdc_int(void); void arch_trigger_cpumask_backtrace(const struct cpumask *mask, - bool exclude_self); + int exclude_cpu); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif /* _ASM_IRQ_H */ diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index a3225912c862..5387ed0a5186 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -750,9 +750,9 @@ static void raise_backtrace(cpumask_t *mask) } } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) { - nmi_trigger_cpumask_backtrace(mask,
[PATCH] powerpc: Include asm/nmi.c in mobility.c for watchdog_hardlockup_set_timeout_pct()
The powerpc/platforms/pseries/mobility.c calls watchdog_hardlockup_set_timeout_pct(), which is declared in . We used to automatically get included, but that changed as of commit 7ca8fe94aa92 ("watchdog/hardlockup: define HARDLOCKUP_DETECTOR_ARCH"). Let's add the explicit include. Reported-by: Randy Dunlap Closes: https://lore.kernel.org/r/af19b76d-aa4b-6c88-9cac-eae4b2072...@infradead.org Fixes: 7ca8fe94aa92 ("watchdog/hardlockup: define HARDLOCKUP_DETECTOR_ARCH") Signed-off-by: Douglas Anderson --- arch/powerpc/platforms/pseries/mobility.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index cd632ba9ebff..0161226d8fec 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -24,6 +24,7 @@ #include #include +#include #include #include "pseries.h" #include "vas.h" /* vas_migration_handler() */ -- 2.41.0.255.g8b1d071c50-goog
[PATCH v2] powerpc: Move arch_trigger_cpumask_backtrace from nmi.h to irq.h
The powerpc architecture was the only one that defined arch_trigger_cpumask_backtrace() in asm/nmi.h instead of asm/irq.h. Move it to be consistent. This fixes compile time errors introduced by commit 7ca8fe94aa92 ("watchdog/hardlockup: define HARDLOCKUP_DETECTOR_ARCH"). That commit caused to stop being included if the hardlockup detector wasn't enabled. The specific errors were: error: implicit declaration of function ‘nmi_cpu_backtrace’ error: implicit declaration of function ‘nmi_trigger_cpumask_backtrace’ NOTE: when moving this into irq.h, we also change the guards from just checking if "CONFIG_NMI_IPI" is defined to also checking if "CONFIG_PPC_BOOK3S_64" is defined. This matches the code in arch/powerpc/kernel/stacktrace.c. Previously this worked because was included if "CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH" was defined. For powerpc that's only selected if "CONFIG_PPC_BOOK3S_64" is defined. Fixes: 7ca8fe94aa92 ("watchdog/hardlockup: define HARDLOCKUP_DETECTOR_ARCH") Reported-by: Michael Ellerman Closes: https://lore.kernel.org/r/871qi5otdh.fsf@mail.lhotse Signed-off-by: Douglas Anderson --- I'd expect that this would land in Andrew Morton's tree along with the other lockup detector stuff. Changes in v2: - Change the guards to include CONFIG_PPC_BOOK3S_64. arch/powerpc/include/asm/irq.h | 6 ++ arch/powerpc/include/asm/nmi.h | 6 -- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 94dffa1dd223..f257cacb49a9 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -53,5 +53,11 @@ void __do_IRQ(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, + bool exclude_self); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace +#endif + #endif /* _ASM_IRQ_H */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index ce25318c3902..49a75340c3e0 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -9,12 +9,6 @@ void watchdog_hardlockup_set_timeout_pct(u64 pct); static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {} #endif -#ifdef CONFIG_NMI_IPI -extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self); -#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace -#endif - extern void hv_nmi_check_nonrecoverable(struct pt_regs *regs); #endif /* _ASM_NMI_H */ -- 2.41.0.162.gfafddb0af9-goog
[PATCH] powerpc: Move arch_trigger_cpumask_backtrace from nmi.h to irq.h
The powerpc architecture was the only one that defined arch_trigger_cpumask_backtrace() in asm/nmi.h instead of asm/irq.h. Move it to be consistent. This fixes compile time errors introduced by commit 7ca8fe94aa92 ("watchdog/hardlockup: define HARDLOCKUP_DETECTOR_ARCH"). That commit caused to stop being included if the hardlockup detector wasn't enabled. The specific errors were: error: implicit declaration of function ‘nmi_cpu_backtrace’ error: implicit declaration of function ‘nmi_trigger_cpumask_backtrace’ Fixes: 7ca8fe94aa92 ("watchdog/hardlockup: define HARDLOCKUP_DETECTOR_ARCH") Reported-by: Michael Ellerman Closes: https://lore.kernel.org/r/871qi5otdh.fsf@mail.lhotse Signed-off-by: Douglas Anderson --- I'd expect that this would land in Andrew Morton's tree along with the other lockup detector stuff. arch/powerpc/include/asm/irq.h | 6 ++ arch/powerpc/include/asm/nmi.h | 6 -- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 94dffa1dd223..f7a90b6f3ceb 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -53,5 +53,11 @@ void __do_IRQ(struct pt_regs *regs); int irq_choose_cpu(const struct cpumask *mask); +#ifdef CONFIG_NMI_IPI +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, + bool exclude_self); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace +#endif + #endif /* _ASM_IRQ_H */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index ce25318c3902..49a75340c3e0 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -9,12 +9,6 @@ void watchdog_hardlockup_set_timeout_pct(u64 pct); static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {} #endif -#ifdef CONFIG_NMI_IPI -extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self); -#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace -#endif - extern void hv_nmi_check_nonrecoverable(struct pt_regs *regs); #endif /* _ASM_NMI_H */ -- 2.41.0.162.gfafddb0af9-goog
[PATCH 10/10] watchdog/hardlockup: Rename HAVE_HARDLOCKUP_DETECTOR_NON_ARCH to ..._PERF_OR_BUDDY
HAVE_HARDLOCKUP_DETECTOR_NON_ARCH is a mouthful and confusing. HAVE_HARDLOCKUP_DETECTOR_PERF_OR_BUDDY is even more of a mouthful, but probably less confusing. Rename the Kconfig names. Signed-off-by: Douglas Anderson --- lib/Kconfig.debug | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index eb1edd5905bc..b9e162698a82 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1058,7 +1058,7 @@ config HARDLOCKUP_DETECTOR_BUDDY # needs SMP). In either case, using the "non-arch" code conflicts with # the NMI watchdog code (which is sometimes used directly and sometimes used # by the arch-provided hardlockup detector). -config HAVE_HARDLOCKUP_DETECTOR_NON_ARCH +config HAVE_HARDLOCKUP_DETECTOR_PERF_OR_BUDDY bool depends on (HAVE_HARDLOCKUP_DETECTOR_PERF || SMP) && !HAVE_NMI_WATCHDOG default y @@ -1077,10 +1077,10 @@ config HARDLOCKUP_DETECTOR_PREFER_BUDDY an arch-specific hardlockup detector or if resources needed for the hardlockup detector are better used for other things. -# This will select the appropriate non-arch hardlockdup detector -config HARDLOCKUP_DETECTOR_NON_ARCH +# This will select the appropriate non-arch hardlockup detector +config HARDLOCKUP_DETECTOR_PERF_OR_BUDDY bool - depends on HAVE_HARDLOCKUP_DETECTOR_NON_ARCH + depends on HAVE_HARDLOCKUP_DETECTOR_PERF_OR_BUDDY select HARDLOCKUP_DETECTOR_BUDDY if !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY @@ -1098,9 +1098,9 @@ config HARDLOCKUP_CHECK_TIMESTAMP config HARDLOCKUP_DETECTOR bool "Detect Hard Lockups" depends on DEBUG_KERNEL && !S390 - depends on HAVE_HARDLOCKUP_DETECTOR_NON_ARCH || HAVE_HARDLOCKUP_DETECTOR_ARCH + depends on HAVE_HARDLOCKUP_DETECTOR_PERF_OR_BUDDY || HAVE_HARDLOCKUP_DETECTOR_ARCH select LOCKUP_DETECTOR - select HARDLOCKUP_DETECTOR_NON_ARCH if HAVE_HARDLOCKUP_DETECTOR_NON_ARCH + select HARDLOCKUP_DETECTOR_PERF_OR_BUDDY if HAVE_HARDLOCKUP_DETECTOR_PERF_OR_BUDDY help Say Y here to enable the kernel to act as a watchdog to detect -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 09/10] watchdog/hardlockup: Move SMP barriers from common code to buddy code
It's been suggested that since the SMP barriers are only potentially useful for the buddy hardlockup detector, not the perf hardlockup detector, that the barriers belong in the buddy code. Let's move them and add clearer comments about why they're needed. Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- kernel/watchdog.c | 6 -- kernel/watchdog_buddy.c | 21 + 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6cc46b8e3d07..a351ab0c35eb 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -109,9 +109,6 @@ EXPORT_SYMBOL(arch_touch_nmi_watchdog); void watchdog_hardlockup_touch_cpu(unsigned int cpu) { per_cpu(watchdog_hardlockup_touched, cpu) = true; - - /* Match with smp_rmb() in watchdog_hardlockup_check() */ - smp_wmb(); } static bool is_hardlockup(unsigned int cpu) @@ -141,9 +138,6 @@ static void watchdog_hardlockup_kick(void) void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) { - /* Match with smp_wmb() in watchdog_hardlockup_touch_cpu() */ - smp_rmb(); - if (per_cpu(watchdog_hardlockup_touched, cpu)) { per_cpu(watchdog_hardlockup_touched, cpu) = false; return; diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c index 2ef88722c5e7..34dbfe091f4b 100644 --- a/kernel/watchdog_buddy.c +++ b/kernel/watchdog_buddy.c @@ -51,6 +51,13 @@ void watchdog_hardlockup_enable(unsigned int cpu) if (next_cpu < nr_cpu_ids) watchdog_hardlockup_touch_cpu(next_cpu); + /* +* Makes sure that watchdog is touched on this CPU before +* other CPUs could see it in watchdog_cpus. The counter +* part is in watchdog_buddy_check_hardlockup(). +*/ + smp_wmb(); + cpumask_set_cpu(cpu, &watchdog_cpus); } @@ -68,6 +75,13 @@ void watchdog_hardlockup_disable(unsigned int cpu) if (next_cpu < nr_cpu_ids) watchdog_hardlockup_touch_cpu(next_cpu); + /* +* Makes sure that watchdog is touched on the next CPU before +* this CPU disappear in watchdog_cpus. The counter part is in +* watchdog_buddy_check_hardlockup(). +*/ + smp_wmb(); + cpumask_clear_cpu(cpu, &watchdog_cpus); } @@ -88,5 +102,12 @@ void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) if (next_cpu >= nr_cpu_ids) return; + /* +* Make sure that the watchdog was touched on next CPU when +* watchdog_next_cpu() returned another one because of +* a change in watchdog_hardlockup_enable()/disable(). +*/ + smp_rmb(); + watchdog_hardlockup_check(next_cpu, NULL); } -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 08/10] watchdog/buddy: Simplify the dependency for HARDLOCKUP_DETECTOR_PREFER_BUDDY
The dependency for HARDLOCKUP_DETECTOR_PREFER_BUDDY was more complicated than it needed to be. If the "perf" detector is available and we have SMP then we have a choice, so enable the config based on just those two config items. Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c9df93402237..eb1edd5905bc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1065,7 +1065,7 @@ config HAVE_HARDLOCKUP_DETECTOR_NON_ARCH config HARDLOCKUP_DETECTOR_PREFER_BUDDY bool "Prefer the buddy CPU hardlockup detector" - depends on HAVE_HARDLOCKUP_DETECTOR_NON_ARCH && HAVE_HARDLOCKUP_DETECTOR_PERF && SMP + depends on HAVE_HARDLOCKUP_DETECTOR_PERF && SMP help Say Y here to prefer the buddy hardlockup detector over the perf one. -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 07/10] watchdog/buddy: Don't copy the cpumask in watchdog_next_cpu()
There's no reason to make a copy of the "watchdog_cpus" locally in watchdog_next_cpu(). Making a copy wouldn't make things any more race free and we're just reading the value so there's no need for a copy. Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- kernel/watchdog_buddy.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c index 3ffc5f2ede5a..2ef88722c5e7 100644 --- a/kernel/watchdog_buddy.c +++ b/kernel/watchdog_buddy.c @@ -10,12 +10,11 @@ static cpumask_t __read_mostly watchdog_cpus; static unsigned int watchdog_next_cpu(unsigned int cpu) { - cpumask_t cpus = watchdog_cpus; unsigned int next_cpu; - next_cpu = cpumask_next(cpu, &cpus); + next_cpu = cpumask_next(cpu, &watchdog_cpus); if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first(&cpus); + next_cpu = cpumask_first(&watchdog_cpus); if (next_cpu == cpu) return nr_cpu_ids; -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 06/10] watchdog/buddy: Cleanup how watchdog_buddy_check_hardlockup() is called
In the patch ("watchdog/hardlockup: detect hard lockups using secondary (buddy) CPUs"), we added a call from the common watchdog.c file into the buddy. That call could be done more cleanly. Specifically: 1. If we move the call into watchdog_hardlockup_kick() then it keeps watchdog_timer_fn() simpler. 2. We don't need to pass an "unsigned long" to the buddy for the timer count. In the patch ("watchdog/hardlockup: add a "cpu" param to watchdog_hardlockup_check()") the count was changed to "atomic_t" which is backed by an int, so we should match types. Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- include/linux/nmi.h | 4 ++-- kernel/watchdog.c | 15 +++ kernel/watchdog_buddy.c | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 6c6a5ce77c66..43893e5f858a 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -114,9 +114,9 @@ void watchdog_hardlockup_disable(unsigned int cpu); void lockup_detector_reconfigure(void); #ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY -void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts); +void watchdog_buddy_check_hardlockup(int hrtimer_interrupts); #else -static inline void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts) {} +static inline void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) {} #endif /** diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 85f4839b6faf..6cc46b8e3d07 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -131,9 +131,12 @@ static bool is_hardlockup(unsigned int cpu) return false; } -static unsigned long watchdog_hardlockup_kick(void) +static void watchdog_hardlockup_kick(void) { - return atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts)); + int new_interrupts; + + new_interrupts = atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts)); + watchdog_buddy_check_hardlockup(new_interrupts); } void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) @@ -195,7 +198,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) #else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */ -static inline unsigned long watchdog_hardlockup_kick(void) { return 0; } +static inline void watchdog_hardlockup_kick(void) { } #endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */ @@ -449,15 +452,11 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) struct pt_regs *regs = get_irq_regs(); int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; - unsigned long hrtimer_interrupts; if (!watchdog_enabled) return HRTIMER_NORESTART; - hrtimer_interrupts = watchdog_hardlockup_kick(); - - /* test for hardlockups */ - watchdog_buddy_check_hardlockup(hrtimer_interrupts); + watchdog_hardlockup_kick(); /* kick the softlockup detector */ if (completion_done(this_cpu_ptr(&softlockup_completion))) { diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c index fee45af2e5bd..3ffc5f2ede5a 100644 --- a/kernel/watchdog_buddy.c +++ b/kernel/watchdog_buddy.c @@ -72,7 +72,7 @@ void watchdog_hardlockup_disable(unsigned int cpu) cpumask_clear_cpu(cpu, &watchdog_cpus); } -void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts) +void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) { unsigned int next_cpu; -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 05/10] watchdog/hardlockup: remove softlockup comment in touch_nmi_watchdog()
In the patch ("watchdog/hardlockup: add comments to touch_nmi_watchdog()") we adjusted some comments for touch_nmi_watchdog(). The comment about the softlockup had a typo and were also felt to be too obvious. Remove it. Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- include/linux/nmi.h | 4 1 file changed, 4 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 3a27169ec383..6c6a5ce77c66 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -140,10 +140,6 @@ static inline void touch_nmi_watchdog(void) */ arch_touch_nmi_watchdog(); - /* -* Touching the hardlock detector implcitily pets the -* softlockup detector too -*/ touch_softlockup_watchdog(); } -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 04/10] watchdog/hardlockup: In watchdog_hardlockup_check() use cpumask_copy()
In the patch ("watchdog/hardlockup: add a "cpu" param to watchdog_hardlockup_check()") we started using a cpumask to keep track of which CPUs to backtrace. When setting up this cpumask, it's better to use cpumask_copy() than to just copy the structure directly. Fix this. Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- kernel/watchdog.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 32dac8028753..85f4839b6faf 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -154,7 +154,9 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) */ if (is_hardlockup(cpu)) { unsigned int this_cpu = smp_processor_id(); - struct cpumask backtrace_mask = *cpu_online_mask; + struct cpumask backtrace_mask; + + cpumask_copy(&backtrace_mask, cpu_online_mask); /* Only print hardlockups once. */ if (per_cpu(watchdog_hardlockup_warned, cpu)) -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 03/10] watchdog/hardlockup: Don't use raw_cpu_ptr() in watchdog_hardlockup_kick()
In the patch ("watchdog/hardlockup: add a "cpu" param to watchdog_hardlockup_check()") there was no reason to use raw_cpu_ptr(). Using this_cpu_ptr() works fine. Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 62230f5b8878..32dac8028753 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -133,7 +133,7 @@ static bool is_hardlockup(unsigned int cpu) static unsigned long watchdog_hardlockup_kick(void) { - return atomic_inc_return(raw_cpu_ptr(&hrtimer_interrupts)); + return atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts)); } void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 00/10] watchdog: Cleanup / fixes after buddy series v5 reviews
This patch series attempts to finish resolving the feedback received from Petr Mladek on the v5 series I posted. Andrew has already landed v5 so I'm posting this as additional patches. Probably the only thing that wasn't fully as clean as Petr requested was the Kconfig stuff. I couldn't find a better way to express it without a more major overhaul. In the very least, I renamed "NON_ARCH" to "PERF_OR_BUDDY" in the hopes that will make it marginally better. Nothing in this series is terribly critical and even the bugfixes are small. However, it does cleanup a few things that were pointed out in review. Douglas Anderson (10): watchdog/hardlockup: Keep kernel.nmi_watchdog sysctl as 0444 if probe fails watchdog/hardlockup: HAVE_NMI_WATCHDOG must implement watchdog_hardlockup_probe() watchdog/hardlockup: Don't use raw_cpu_ptr() in watchdog_hardlockup_kick() watchdog/hardlockup: In watchdog_hardlockup_check() use cpumask_copy() watchdog/hardlockup: remove softlockup comment in touch_nmi_watchdog() watchdog/buddy: Cleanup how watchdog_buddy_check_hardlockup() is called watchdog/buddy: Don't copy the cpumask in watchdog_next_cpu() watchdog/buddy: Simplify the dependency for HARDLOCKUP_DETECTOR_PREFER_BUDDY watchdog/hardlockup: Move SMP barriers from common code to buddy code watchdog/hardlockup: Rename HAVE_HARDLOCKUP_DETECTOR_NON_ARCH to ..._PERF_OR_BUDDY arch/Kconfig| 3 +- arch/sparc/kernel/nmi.c | 5 +++ include/linux/nmi.h | 14 ++--- kernel/watchdog.c | 68 ++--- kernel/watchdog_buddy.c | 28 ++--- lib/Kconfig.debug | 14 - 6 files changed, 70 insertions(+), 62 deletions(-) -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 01/10] watchdog/hardlockup: Keep kernel.nmi_watchdog sysctl as 0444 if probe fails
The permissions for the kernel.nmi_watchdog sysctl have always been set at compile time despite the fact that a watchdog can fail to probe. Let's fix this and set the permissions based on whether the hardlockup detector actually probed. Fixes: a994a3147e4c ("watchdog/hardlockup/perf: Implement init time detection of perf") Reported-by: Petr Mladek Closes: https://lore.kernel.org/r/ZHCn4hNxFpY5-9Ki@alley Signed-off-by: Douglas Anderson --- include/linux/nmi.h | 6 -- kernel/watchdog.c | 30 -- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 333465e235e1..3a27169ec383 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -95,12 +95,6 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); static inline void arch_touch_nmi_watchdog(void) { } #endif -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) -# define NMI_WATCHDOG_SYSCTL_PERM 0644 -#else -# define NMI_WATCHDOG_SYSCTL_PERM 0444 -#endif - #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 237990e8d345..4b9e31edb47f 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -880,15 +880,6 @@ static struct ctl_table watchdog_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = (void *)&sixty, }, - { - .procname = "nmi_watchdog", - .data = &watchdog_hardlockup_user_enabled, - .maxlen = sizeof(int), - .mode = NMI_WATCHDOG_SYSCTL_PERM, - .proc_handler = proc_nmi_watchdog, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, { .procname = "watchdog_cpumask", .data = &watchdog_cpumask_bits, @@ -952,10 +943,28 @@ static struct ctl_table watchdog_sysctls[] = { {} }; +static struct ctl_table watchdog_hardlockup_sysctl[] = { + { + .procname = "nmi_watchdog", + .data = &watchdog_hardlockup_user_enabled, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_nmi_watchdog, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + static void __init watchdog_sysctl_init(void) { register_sysctl_init("kernel", watchdog_sysctls); + + if (watchdog_hardlockup_available) + watchdog_hardlockup_sysctl[0].mode = 0644; + register_sysctl_init("kernel", watchdog_hardlockup_sysctl); } + #else #define watchdog_sysctl_init() do { } while (0) #endif /* CONFIG_SYSCTL */ @@ -1011,6 +1020,8 @@ static int __init lockup_detector_check(void) /* Make sure no work is pending. */ flush_work(&detector_work); + watchdog_sysctl_init(); + return 0; } @@ -1030,5 +1041,4 @@ void __init lockup_detector_init(void) allow_lockup_detector_init_retry = true; lockup_detector_setup(); - watchdog_sysctl_init(); } -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH 02/10] watchdog/hardlockup: HAVE_NMI_WATCHDOG must implement watchdog_hardlockup_probe()
Right now there is one arch (sparc64) that selects HAVE_NMI_WATCHDOG without selecting HAVE_HARDLOCKUP_DETECTOR_ARCH. Because of that one architecture, we have some special case code in the watchdog core to handle the fact that watchdog_hardlockup_probe() isn't implemented. Let's implement watchdog_hardlockup_probe() for sparc64 and get rid of the special case. As a side effect of doing this, code inspection tells us that we could fix a minor bug where the system won't properly realize that NMI watchdogs are disabled. Specifically, on powerpc if CONFIG_PPC_WATCHDOG is turned off the arch might still select CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH which selects CONFIG_HAVE_NMI_WATCHDOG. Since CONFIG_PPC_WATCHDOG was off then nothing will override the "weak" watchdog_hardlockup_probe() and we'll fallback to looking at CONFIG_HAVE_NMI_WATCHDOG. Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- Though this does fix a minor bug, I didn't mark this as "Fixes" because it's super minor. One could also argue that this wasn't a bug at all but simply was never an implemented feature. The code that added some amount of dynamicness here was commit a994a3147e4c ("watchdog/hardlockup/perf: Implement init time detection of perf") which, as per the title, was only intending to make "perf" dynamic. The old NMI watchdog presumably has never been handled dynamically. arch/Kconfig| 3 ++- arch/sparc/kernel/nmi.c | 5 + kernel/watchdog.c | 13 - 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 64d771855ecd..b4f6387b12fe 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -428,7 +428,8 @@ config HAVE_NMI_WATCHDOG bool help The arch provides a low level NMI watchdog. It provides - asm/nmi.h, and defines its own arch_touch_nmi_watchdog(). + asm/nmi.h, and defines its own watchdog_hardlockup_probe() and + arch_touch_nmi_watchdog(). config HAVE_HARDLOCKUP_DETECTOR_ARCH bool diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 9d9e29b75c43..17cdfdbf1f3b 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -65,6 +65,11 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); +int __init watchdog_hardlockup_probe(void) +{ + return 0; +} + static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) { int this_cpu = smp_processor_id(); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4b9e31edb47f..62230f5b8878 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -217,19 +217,6 @@ void __weak watchdog_hardlockup_disable(unsigned int cpu) { } */ int __weak __init watchdog_hardlockup_probe(void) { - /* -* If CONFIG_HAVE_NMI_WATCHDOG is defined then an architecture -* is assumed to have the hard watchdog available and we return 0. -*/ - if (IS_ENABLED(CONFIG_HAVE_NMI_WATCHDOG)) - return 0; - - /* -* Hardlockup detectors other than those using CONFIG_HAVE_NMI_WATCHDOG -* are required to implement a non-weak version of this probe function -* to tell whether they are available. If they don't override then -* we'll return -ENODEV. -*/ return -ENODEV; } -- 2.41.0.rc0.172.g3f132b7071-goog
[PATCH v5 17/18] arm64: add hw_nmi_get_sample_period for preparation of lockup detector
From: Lecopzer Chen Set safe maximum CPU frequency to 5 GHz in case a particular platform doesn't implement cpufreq driver. Although, architecture doesn't put any restrictions on maximum frequency but 5 GHz seems to be safe maximum given the available Arm CPUs in the market which are clocked much less than 5 GHz. On the other hand, we can't make it much higher as it would lead to a large hard-lockup detection timeout on parts which are running slower (eg. 1GHz on Developerbox) and doesn't possess a cpufreq driver. Co-developed-by: Sumit Garg Signed-off-by: Sumit Garg Co-developed-by: Pingfan Liu Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. The only change I made here was to remove an extra blank line that checkpatch complained about. As mentioned in the cover letter, I'm not really expecting this patch to land together with the patches for the buddy detector. I included it with my series simply for convenience of testing both series together. NOTE: the previous patch posted by Lecopzer pointed to Sumit's patch [2] in the commit text but provided no context. I moved it to this "after the cut" note. [1] https://lore.kernel.org/r/20220903093415.15850-6-lecopzer.c...@mediatek.com/ [2] http://lore.kernel.org/linux-arm-kernel/1610712101-14929-1-git-send-email-sumit.g...@linaro.org (no changes since v4) Changes in v4: - Pulled ("add hw_nmi_get_sample_period for ...") into my series for v4. arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/watchdog_hld.c | 24 2 files changed, 25 insertions(+) create mode 100644 arch/arm64/kernel/watchdog_hld.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7c2bb4e72476..cc22011ab66a 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_ARM64_MODULE_PLTS)+= module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c new file mode 100644 index ..2401eb1b7e55 --- /dev/null +++ b/arch/arm64/kernel/watchdog_hld.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +/* + * Safe maximum CPU frequency in case a particular platform doesn't implement + * cpufreq driver. Although, architecture doesn't put any restrictions on + * maximum frequency but 5 GHz seems to be safe maximum given the available + * Arm CPUs in the market which are clocked much less than 5 GHz. On the other + * hand, we can't make it much higher as it would lead to a large hard-lockup + * detection timeout on parts which are running slower (eg. 1GHz on + * Developerbox) and doesn't possess a cpufreq driver. + */ +#define SAFE_MAX_CPU_FREQ 50UL // 5 GHz +u64 hw_nmi_get_sample_period(int watchdog_thresh) +{ + unsigned int cpu = smp_processor_id(); + unsigned long max_cpu_freq; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + max_cpu_freq = SAFE_MAX_CPU_FREQ; + + return (u64)max_cpu_freq * watchdog_thresh; +} -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 16/18] watchdog/perf: Adapt the watchdog_perf interface for async model
From: Lecopzer Chen When lockup_detector_init()->watchdog_hardlockup_probe(), PMU may be not ready yet. E.g. on arm64, PMU is not ready until device_initcall(armv8_pmu_driver_init). And it is deeply integrated with the driver model and cpuhp. Hence it is hard to push this initialization before smp_init(). But it is easy to take an opposite approach and try to initialize the watchdog once again later. The delayed probe is called using workqueues. It need to allocate memory and must be proceed in a normal context. The delayed probe is able to use if watchdog_hardlockup_probe() returns non-zero which means the return code returned when PMU is not ready yet. Provide an API - lockup_detector_retry_init() for anyone who needs to delayed init lockup detector if they had ever failed at lockup_detector_init(). The original assumption is: nobody should use delayed probe after lockup_detector_check() which has __init attribute. That is, anyone uses this API must call between lockup_detector_init() and lockup_detector_check(), and the caller must have __init attribute Reviewed-by: Petr Mladek Co-developed-by: Pingfan Liu Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. As part of making this match with my series, I resolved a few conflicts and did a few renames. I also fixed the kernel test robot yell [2] by providing a dummy implementation of the retry function if CONFIG_LOCKUP_DETECTOR wasn't defined. That led me to move the definition of the function and sanitize the name of the function to match others around it. This patch makes less sense without the patches to add support for the arm64 perf hardlockup detector. Thus, I've put it at the end of the series. I removed the "kernel test robot" tag since that didn't really make sense. Presumably the robot found a problem on a previous version of the patch, but that's not normally a reason to add the Reported-by. [1] https://lore.kernel.org/r/20220903093415.15850-5-lecopzer.c...@mediatek.com/ [2] https://lore.kernel.org/r/202209050639.jdawd49e-...@intel.com/ (no changes since v4) Changes in v4: - Pulled ("Adapt the watchdog_hld interface ...") into my series for v4. include/linux/nmi.h | 2 ++ kernel/watchdog.c | 67 - 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index eb616fc07c85..d23902a2fd49 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -13,6 +13,7 @@ #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); +void lockup_detector_retry_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); @@ -34,6 +35,7 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } +static inline void lockup_detector_retry_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4110f7ca23a5..877d8670f26e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -208,7 +208,13 @@ void __weak watchdog_hardlockup_enable(unsigned int cpu) { } void __weak watchdog_hardlockup_disable(unsigned int cpu) { } -/* Return 0, if a hardlockup watchdog is available. Error code otherwise */ +/* + * Watchdog-detector specific API. + * + * Return 0 when hardlockup watchdog is available, negative value otherwise. + * Note that the negative value means that a delayed probe might + * succeed later. + */ int __weak __init watchdog_hardlockup_probe(void) { /* @@ -954,6 +960,62 @@ static void __init watchdog_sysctl_init(void) #define watchdog_sysctl_init() do { } while (0) #endif /* CONFIG_SYSCTL */ +static void __init lockup_detector_delay_init(struct work_struct *work); +static bool allow_lockup_detector_init_retry __initdata; + +static struct work_struct detector_work __initdata = + __WORK_INITIALIZER(detector_work, lockup_detector_delay_init); + +static void __init lockup_detector_delay_init(struct work_struct *work) +{ + int ret; + + ret = watchdog_hardlockup_probe(); + if (ret) { + pr_info("Delayed init of the lockup detector failed: %d\n", ret); + pr_info("Hard watchdog permanently disabled\n"); + return; + } + + allow_lockup_detector_init_retry = false; + + watchdog_hardlockup_available = true; + lockup_detector_setup(); +} + +/* + * lockup_detector_retry_init - retry init lockup detector if poss
[PATCH v5 15/18] watchdog/perf: Add a weak function for an arch to detect if perf can use NMIs
On arm64, NMI support needs to be detected at runtime. Add a weak function to the perf hardlockup detector so that an architecture can implement it to detect whether NMIs are available. Signed-off-by: Douglas Anderson --- While I won't object to this patch landing, I consider it part of the arm64 perf hardlockup effort. I would be OK with the earlier patches in the series landing and then not landing ${SUBJECT} patch nor anything else later. I'll also note that, as an alternative to this, it would be nice if we could figure out how to make perf_event_create_kernel_counter() fail on arm64 if NMIs aren't available. Maybe we could add a "must_use_nmi" element to "struct perf_event_attr"? (no changes since v4) Changes in v4: - ("Add a weak function for an arch to detect ...") new for v4. include/linux/nmi.h| 1 + kernel/watchdog_perf.c | 12 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 47db14e7da52..eb616fc07c85 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -210,6 +210,7 @@ static inline bool trigger_single_cpu_backtrace(int cpu) #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh); +bool arch_perf_nmi_is_available(void); #endif #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index 349fcd4d2abc..8ea00c4a24b2 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -234,12 +234,22 @@ void __init hardlockup_detector_perf_restart(void) } } +bool __weak __init arch_perf_nmi_is_available(void) +{ + return true; +} + /** * watchdog_hardlockup_probe - Probe whether NMI event is available at all */ int __init watchdog_hardlockup_probe(void) { - int ret = hardlockup_detector_event_create(); + int ret; + + if (!arch_perf_nmi_is_available()) + return -ENODEV; + + ret = hardlockup_detector_event_create(); if (ret) { pr_info("Perf NMI watchdog permanently disabled\n"); -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 14/18] watchdog/hardlockup: detect hard lockups using secondary (buddy) CPUs
Implement a hardlockup detector that doesn't doesn't need any extra arch-specific support code to detect lockups. Instead of using something arch-specific we will use the buddy system, where each CPU watches out for another one. Specifically, each CPU will use its softlockup hrtimer to check that the next CPU is processing hrtimer interrupts by verifying that a counter is increasing. NOTE: unlike the other hard lockup detectors, the buddy one can't easily show what's happening on the CPU that locked up just by doing a simple backtrace. It relies on some other mechanism in the system to get information about the locked up CPUs. This could be support for NMI backtraces like [1], it could be a mechanism for printing the PC of locked CPUs at panic time like [2] / [3], or it could be something else. Even though that means we still rely on arch-specific code, this arch-specific code seems to often be implemented even on architectures that don't have a hardlockup detector. This style of hardlockup detector originated in some downstream Android trees and has been rebased on / carried in ChromeOS trees for quite a long time for use on arm and arm64 boards. Historically on these boards we've leveraged mechanism [2] / [3] to get information about hung CPUs, but we could move to [1]. Although the original motivation for the buddy system was for use on systems without an arch-specific hardlockup detector, it can still be useful to use even on systems that _do_ have an arch-specific hardlockup detector. On x86, for instance, there is a 24-part patch series [4] in progress switching the arch-specific hard lockup detector from a scarce perf counter to a less-scarce hardware resource. Potentially the buddy system could be a simpler alternative to free up the perf counter but still get hard lockup detection. Overall, pros (+) and cons (-) of the buddy system compared to an arch-specific hardlockup detector (which might be implemented using perf): + The buddy system is usable on systems that don't have an arch-specific hardlockup detector, like arm32 and arm64 (though it's being worked on for arm64 [5]). + The buddy system may free up scarce hardware resources. + If a CPU totally goes out to lunch (can't process NMIs) the buddy system could still detect the problem (though it would be unlikely to be able to get a stack trace). + The buddy system uses the same timer function to pet the hardlockup detector on the running CPU as it uses to detect hardlockups on other CPUs. Compared to other hardlockup detectors, this means it generates fewer interrupts and thus is likely better able to let CPUs stay idle longer. - If all CPUs are hard locked up at the same time the buddy system can't detect it. - If we don't have SMP we can't use the buddy system. - The buddy system needs an arch-specific mechanism (possibly NMI backtrace) to get info about the locked up CPU. [1] https://lore.kernel.org/r/20230419225604.21204-1-diand...@chromium.org [2] https://issuetracker.google.com/172213129 [3] https://docs.kernel.org/trace/coresight/coresight-cpu-debug.html [4] https://lore.kernel.org/lkml/20230301234753.28582-1-ricardo.neri-calde...@linux.intel.com/ [5] https://lore.kernel.org/linux-arm-kernel/20220903093415.15850-1-lecopzer.c...@mediatek.com/ Signed-off-by: Colin Cross Signed-off-by: Matthias Kaehlcke Signed-off-by: Guenter Roeck Signed-off-by: Tzung-Bi Shih Signed-off-by: Douglas Anderson --- This patch has been rebased in ChromeOS kernel trees many times, and each time someone had to do work on it they added their Signed-off-by. I've included those here. I've also left the author as Colin Cross since the core code is still his, even if it's now been reorganized a lot. (no changes since v4) Changes in v4: - Reworked atop a whole pile of cleanups, as suggested by Petr. Changes in v3: - Added a note in commit message about the effect on idle. - Cleaned up commit message pros/cons to be complete sentences. - More cpu => CPU (in Kconfig and comments). - No code changes other than comments. Changes in v2: - No code changes. - Reworked description and Kconfig based on v1 discussion. - cpu => CPU (in commit message). include/linux/nmi.h | 9 +++- kernel/Makefile | 1 + kernel/watchdog.c | 29 + kernel/watchdog_buddy.c | 93 + lib/Kconfig.debug | 52 +-- 5 files changed, 173 insertions(+), 11 deletions(-) create mode 100644 kernel/watchdog_buddy.c diff --git a/include/linux/nmi.h b/include/linux/nmi.h index c216e8a1be1f..47db14e7da52 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -87,8 +87,9 @@ extern unsigned int hardlockup_panic; static inline void hardlockup_detector_disable(void) {} #endif -#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) +#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER) void ar
[PATCH v5 13/18] watchdog/hardlockup: Have the perf hardlockup use __weak functions more cleanly
The fact that there watchdog_hardlockup_enable(), watchdog_hardlockup_disable(), and watchdog_hardlockup_probe() are declared __weak means that the configured hardlockup detector can define non-weak versions of those functions if it needs to. Instead of doing this, the perf hardlockup detector hooked itself into the default __weak implementation, which was a bit awkward. Clean this up. >From comments, it looks as if the original design was done because the __weak function were expected to implemented by the architecture and not by the configured hardlockup detector. This got awkward when we tried to add the buddy lockup detector which was not arch-specific but wanted to hook into those same functions. This is not expected to have any functional impact. Reviewed-by: Petr Mladek Signed-off-by: Douglas Anderson --- (no changes since v4) Changes in v4: - ("Have the perf hardlockup use __weak ...") new for v4. include/linux/nmi.h| 10 -- kernel/watchdog.c | 30 ++ kernel/watchdog_perf.c | 20 ++-- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 83076bf70ce8..c216e8a1be1f 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -103,21 +103,11 @@ static inline void arch_touch_nmi_watchdog(void) { } #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); -extern void hardlockup_detector_perf_disable(void); -extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); -extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } -static inline void hardlockup_detector_perf_disable(void) { } -static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } -# if !defined(CONFIG_HAVE_NMI_WATCHDOG) -static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } -# else -static inline int hardlockup_detector_perf_init(void) { return 0; } -# endif #endif void watchdog_hardlockup_stop(void); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 31548c0ae874..08ce046f636d 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -187,27 +187,33 @@ static inline void watchdog_hardlockup_kick(void) { } #endif /* !CONFIG_HARDLOCKUP_DETECTOR_PERF */ /* - * These functions can be overridden if an architecture implements its - * own hardlockup detector. + * These functions can be overridden based on the configured hardlockdup detector. * * watchdog_hardlockup_enable/disable can be implemented to start and stop when - * softlockup watchdog start and stop. The arch must select the + * softlockup watchdog start and stop. The detector must select the * SOFTLOCKUP_DETECTOR Kconfig. */ -void __weak watchdog_hardlockup_enable(unsigned int cpu) -{ - hardlockup_detector_perf_enable(); -} +void __weak watchdog_hardlockup_enable(unsigned int cpu) { } -void __weak watchdog_hardlockup_disable(unsigned int cpu) -{ - hardlockup_detector_perf_disable(); -} +void __weak watchdog_hardlockup_disable(unsigned int cpu) { } /* Return 0, if a hardlockup watchdog is available. Error code otherwise */ int __weak __init watchdog_hardlockup_probe(void) { - return hardlockup_detector_perf_init(); + /* +* If CONFIG_HAVE_NMI_WATCHDOG is defined then an architecture +* is assumed to have the hard watchdog available and we return 0. +*/ + if (IS_ENABLED(CONFIG_HAVE_NMI_WATCHDOG)) + return 0; + + /* +* Hardlockup detectors other than those using CONFIG_HAVE_NMI_WATCHDOG +* are required to implement a non-weak version of this probe function +* to tell whether they are available. If they don't override then +* we'll return -ENODEV. +*/ + return -ENODEV; } /** diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index 9e6042a892b3..349fcd4d2abc 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -132,10 +132,14 @@ static int hardlockup_detector_event_create(void) } /** - * hardlockup_detector_perf_enable - Enable the local event + * watchdog_hardlockup_enable - Enable the local event + * + * @cpu: The CPU to enable hard lockup on. */ -void hardlockup_detector_perf_enable(void) +void watchdog_hardlockup_enable(unsigned int cpu) { + WARN_ON_ONCE(cpu != smp_processor_id()); + if (hardlockup_detector_event_create()) return; @@ -147,12 +151,16 @@ void hardlockup_detector_perf_enable(void) } /** - * hardlockup_detector_perf_disable - Disable the local event + * watchdog_hardlockup_disable - Disable the local event + * + * @cpu: The CPU to enable har
[PATCH v5 12/18] watchdog/hardlockup: Rename some "NMI watchdog" constants/function
Do a search and replace of: - NMI_WATCHDOG_ENABLED => WATCHDOG_HARDLOCKUP_ENABLED - SOFT_WATCHDOG_ENABLED => WATCHDOG_SOFTOCKUP_ENABLED - watchdog_nmi_ => watchdog_hardlockup_ - nmi_watchdog_available => watchdog_hardlockup_available - nmi_watchdog_user_enabled => watchdog_hardlockup_user_enabled - soft_watchdog_user_enabled => watchdog_softlockup_user_enabled - NMI_WATCHDOG_DEFAULT => WATCHDOG_HARDLOCKUP_DEFAULT Then update a few comments near where names were changed. This is specifically to make it less confusing when we want to introduce the buddy hardlockup detector, which isn't using NMIs. As part of this, we sanitized a few names for consistency. Signed-off-by: Douglas Anderson --- Changes in v5: - Found a few more names / comments to change. - Tried to make names more consistent as per v4 feedback. Changes in v4: - ("Rename some "NMI watchdog" constants/function ...") new for v4. arch/powerpc/include/asm/nmi.h| 4 +- arch/powerpc/kernel/watchdog.c| 12 +-- arch/powerpc/platforms/pseries/mobility.c | 4 +- arch/sparc/kernel/nmi.c | 4 +- include/linux/nmi.h | 18 ++-- kernel/watchdog.c | 113 +++--- kernel/watchdog_perf.c| 2 +- 7 files changed, 79 insertions(+), 78 deletions(-) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index c3c7adef74de..43bfd4de868f 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -5,10 +5,10 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); long soft_nmi_interrupt(struct pt_regs *regs); -void watchdog_nmi_set_timeout_pct(u64 pct); +void watchdog_hardlockup_set_timeout_pct(u64 pct); #else static inline void arch_touch_nmi_watchdog(void) {} -static inline void watchdog_nmi_set_timeout_pct(u64 pct) {} +static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {} #endif #ifdef CONFIG_NMI_IPI diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index dbcc4a793f0b..edb2dd1f53eb 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -438,7 +438,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return HRTIMER_NORESTART; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -479,7 +479,7 @@ static void start_watchdog(void *arg) return; } - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -546,7 +546,7 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_stop(void) +void watchdog_hardlockup_stop(void) { int cpu; @@ -554,7 +554,7 @@ void watchdog_nmi_stop(void) stop_watchdog_on_cpu(cpu); } -void watchdog_nmi_start(void) +void watchdog_hardlockup_start(void) { int cpu; @@ -566,7 +566,7 @@ void watchdog_nmi_start(void) /* * Invoked from core watchdog init. */ -int __init watchdog_nmi_probe(void) +int __init watchdog_hardlockup_probe(void) { int err; @@ -582,7 +582,7 @@ int __init watchdog_nmi_probe(void) } #ifdef CONFIG_PPC_PSERIES -void watchdog_nmi_set_timeout_pct(u64 pct) +void watchdog_hardlockup_set_timeout_pct(u64 pct) { pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct); WRITE_ONCE(wd_timeout_pct, pct); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 6f30113b5468..cd632ba9ebff 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -750,7 +750,7 @@ static int pseries_migrate_partition(u64 handle) goto out; if (factor) - watchdog_nmi_set_timeout_pct(factor); + watchdog_hardlockup_set_timeout_pct(factor); ret = pseries_suspend(handle); if (ret == 0) { @@ -766,7 +766,7 @@ static int pseries_migrate_partition(u64 handle) pseries_cancel_migration(handle, ret); if (factor) - watchdog_nmi_set_timeout_pct(0); + watchdog_hardlockup_set_timeout_pct(0); out: vas_migration_handler(VAS_RESUME); diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 5dcf31f7e81f..9d9e29b75c43 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -282,7 +282,7 @@ __setup("nmi_watchdog=", setup_nmi_watchdog); * sparc specific NMI watchdog enable function. * Enables watchdog if it is not enabled already. */
[PATCH v5 11/18] watchdog/hardlockup: Move perf hardlockup watchdog petting to watchdog.c
In preparation for the buddy hardlockup detector, which wants the same petting logic as the current perf hardlockup detector, move the code to watchdog.c. While doing this, rename the global variable to match others nearby. As part of this change we have to change the code to account for the fact that the CPU we're running on might be different than the one we're checking. Currently the code in watchdog.c is guarded by CONFIG_HARDLOCKUP_DETECTOR_PERF, which makes this change seem silly. However, a future patch will change this. Signed-off-by: Douglas Anderson --- Changes in v5: - Fixed wrong __this_cpu to per_cpu (oops). - Move side effect (timestamp check ordering) to its own patch. - watchdog_hardlockup_touch => watchdog_hardlockup_touched. Changes in v4: - ("Move perf hardlockup watchdog petting to watchdog.c") new for v4. include/linux/nmi.h| 5 +++-- kernel/watchdog.c | 19 +++ kernel/watchdog_perf.c | 19 --- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 92aa568c0c42..e286a2a1902d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -88,7 +88,10 @@ static inline void hardlockup_detector_disable(void) {} #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) +void arch_touch_nmi_watchdog(void); void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); +#elif !defined(CONFIG_HAVE_NMI_WATCHDOG) +static inline void arch_touch_nmi_watchdog(void) { } #endif #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) @@ -98,7 +101,6 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) -extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); @@ -113,7 +115,6 @@ static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } # if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } -static inline void arch_touch_nmi_watchdog(void) {} # else static inline int hardlockup_detector_perf_init(void) { return 0; } # endif diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 2552e224f76a..64d7d2a0a7df 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -90,8 +90,22 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts); static DEFINE_PER_CPU(int, hrtimer_interrupts_saved); static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned); +static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched); static unsigned long watchdog_hardlockup_all_cpu_dumped; +notrace void arch_touch_nmi_watchdog(void) +{ + /* +* Using __raw here because some code paths have +* preemption enabled. If preemption is enabled +* then interrupts should be enabled too, in which +* case we shouldn't have to worry about the watchdog +* going off. +*/ + raw_cpu_write(watchdog_hardlockup_touched, true); +} +EXPORT_SYMBOL(arch_touch_nmi_watchdog); + static bool is_hardlockup(unsigned int cpu) { int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu)); @@ -116,6 +130,11 @@ static void watchdog_hardlockup_kick(void) void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) { + if (per_cpu(watchdog_hardlockup_touched, cpu)) { + per_cpu(watchdog_hardlockup_touched, cpu) = false; + return; + } + /* * Check for a hardlockup by making sure the CPU's timer * interrupt is incrementing. The timer interrupt should have diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index 4e60e8023515..547917ebd5d3 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -20,26 +20,12 @@ #include #include -static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); static DEFINE_PER_CPU(struct perf_event *, dead_event); static struct cpumask dead_events_mask; static atomic_t watchdog_cpus = ATOMIC_INIT(0); -notrace void arch_touch_nmi_watchdog(void) -{ - /* -* Using __raw here because some code paths have -* preemption enabled. If preemption is enabled -* then interrupts should be enabled too, in which -* case we shouldn't have to worry about the watchdog -* going off. -*/ - raw_cpu_write(watchdog_nmi_touch, true); -} -EXPORT_SYMBOL(arch_touch_nmi_watchdog); - #ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP static DEFINE_PER_CPU(ktime_t, last_timestamp); static DEFINE_PER_CPU(unsigned int, nmi_rearmed); @@ -115,11 +101,6 @@ static void watchdog_overflow_callback(struct perf_
[PATCH v5 10/18] watchdog/hardlockup: Add a "cpu" param to watchdog_hardlockup_check()
In preparation for the buddy hardlockup detector where the CPU checking for lockup might not be the currently running CPU, add a "cpu" parameter to watchdog_hardlockup_check(). As part of this change, make hrtimer_interrupts an atomic_t since now the CPU incrementing the value and the CPU reading the value might be different. Technially this could also be done with just READ_ONCE and WRITE_ONCE, but atomic_t feels a little cleaner in this case. While hrtimer_interrupts is made atomic_t, we change hrtimer_interrupts_saved from "unsigned long" to "int". The "int" is needed to match the data type backing atomic_t for hrtimer_interrupts. Even if this changes us from 64-bits to 32-bits (which I don't think is true for most compilers), it doesn't really matter. All we ever do is increment it every few seconds and compare it to an old value so 32-bits is fine (even 16-bits would be). The "signed" vs "unsigned" also doesn't matter for simple equality comparisons. hrtimer_interrupts_saved is _not_ switched to atomic_t nor even accessed with READ_ONCE / WRITE_ONCE. The hrtimer_interrupts_saved is always consistently accessed with the same CPU. NOTE: with the upcoming "buddy" detector there is one special case. When a CPU goes offline/online then we can change which CPU is the one to consistently access a given instance of hrtimer_interrupts_saved. We still can't end up with a partially updated hrtimer_interrupts_saved, however, because we end up petting all affected CPUs to make sure the new and old CPU can't end up somehow read/write hrtimer_interrupts_saved at the same time. Signed-off-by: Douglas Anderson --- Changes in v5: - Don't dump stack on the buddy CPU if we fail to backtrace the hung CPU. - Use atomic_t for hrtimer_interrupts. Changes in v4: - ("Add a "cpu" param to watchdog_hardlockup_check()") new for v4. include/linux/nmi.h| 2 +- kernel/watchdog.c | 52 ++ kernel/watchdog_perf.c | 2 +- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0c62c1bf0a71..92aa568c0c42 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -88,7 +88,7 @@ static inline void hardlockup_detector_disable(void) {} #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) -void watchdog_hardlockup_check(struct pt_regs *regs); +void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); #endif #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 169e5dffbc00..2552e224f76a 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -87,29 +87,34 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); -static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); +static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts); +static DEFINE_PER_CPU(int, hrtimer_interrupts_saved); static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned); static unsigned long watchdog_hardlockup_all_cpu_dumped; -static bool is_hardlockup(void) +static bool is_hardlockup(unsigned int cpu) { - unsigned long hrint = __this_cpu_read(hrtimer_interrupts); + int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu)); - if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) + if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) return true; - __this_cpu_write(hrtimer_interrupts_saved, hrint); + /* +* NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE +* for hrtimer_interrupts_saved. hrtimer_interrupts_saved is +* written/read by a single CPU. +*/ + per_cpu(hrtimer_interrupts_saved, cpu) = hrint; return false; } static void watchdog_hardlockup_kick(void) { - __this_cpu_inc(hrtimer_interrupts); + atomic_inc(raw_cpu_ptr(&hrtimer_interrupts)); } -void watchdog_hardlockup_check(struct pt_regs *regs) +void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) { /* * Check for a hardlockup by making sure the CPU's timer @@ -117,35 +122,42 @@ void watchdog_hardlockup_check(struct pt_regs *regs) * fired multiple times before we overflow'd. If it hasn't * then this is a good indication the cpu is stuck */ - if (is_hardlockup()) { + if (is_hardlockup(cpu)) { unsigned int this_cpu = smp_processor_id(); + struct cpumask backtrace_mask = *cpu_online_mask; /* Only print hardlockups once. */ - if (__this_cpu_read(watchdog_hardlockup_warned)) + if (per_cpu(watchdog_hardlockup_warned, cpu)) return
[PATCH v5 09/18] watchdog/hardlockup: Style changes to watchdog_hardlockup_check() / is_hardlockup()
These are tiny style changes: - Add a blank line before a "return". - Renames two globals to use the "watchdog_hardlockup" prefix. - Store processor id in "unsigned int" rather than "int". - Minor comment rewording. - Use "else" rather than extra returns since it seemed more symmetric. Reviewed-by: Petr Mladek Signed-off-by: Douglas Anderson --- Changes in v5: - watchdog_hardlockup_dumped_stacks => watchdog_hardlockup_all_cpu_dumped - watchdog_hardlockup_processed => watchdog_hardlockup_warned Changes in v4: - ("Style changes to watchdog_hardlockup_check ...") new for v4. kernel/watchdog.c | 32 +++- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 12ce37d76e7d..169e5dffbc00 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -89,8 +89,8 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); -static DEFINE_PER_CPU(bool, hard_watchdog_warn); -static unsigned long hardlockup_allcpu_dumped; +static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned); +static unsigned long watchdog_hardlockup_all_cpu_dumped; static bool is_hardlockup(void) { @@ -100,6 +100,7 @@ static bool is_hardlockup(void) return true; __this_cpu_write(hrtimer_interrupts_saved, hrint); + return false; } @@ -110,21 +111,20 @@ static void watchdog_hardlockup_kick(void) void watchdog_hardlockup_check(struct pt_regs *regs) { - /* check for a hardlockup -* This is done by making sure our timer interrupt -* is incrementing. The timer interrupt should have -* fired multiple times before we overflow'd. If it hasn't + /* +* Check for a hardlockup by making sure the CPU's timer +* interrupt is incrementing. The timer interrupt should have +* fired multiple times before we overflow'd. If it hasn't * then this is a good indication the cpu is stuck */ if (is_hardlockup()) { - int this_cpu = smp_processor_id(); + unsigned int this_cpu = smp_processor_id(); - /* only print hardlockups once */ - if (__this_cpu_read(hard_watchdog_warn) == true) + /* Only print hardlockups once. */ + if (__this_cpu_read(watchdog_hardlockup_warned)) return; - pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", -this_cpu); + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu); print_modules(); print_irqtrace_events(current); if (regs) @@ -137,18 +137,16 @@ void watchdog_hardlockup_check(struct pt_regs *regs) * generating interleaving traces */ if (sysctl_hardlockup_all_cpu_backtrace && - !test_and_set_bit(0, &hardlockup_allcpu_dumped)) + !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped)) trigger_allbutself_cpu_backtrace(); if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); - __this_cpu_write(hard_watchdog_warn, true); - return; + __this_cpu_write(watchdog_hardlockup_warned, true); + } else { + __this_cpu_write(watchdog_hardlockup_warned, false); } - - __this_cpu_write(hard_watchdog_warn, false); - return; } #else /* CONFIG_HARDLOCKUP_DETECTOR_PERF */ -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 08/18] watchdog/hardlockup: Move perf hardlockup checking/panic to common watchdog.c
The perf hardlockup detector works by looking at interrupt counts and seeing if they change from run to run. The interrupt counts are managed by the common watchdog code via its watchdog_timer_fn(). Currently the API between the perf detector and the common code is a function: is_hardlockup(). When the hard lockup detector sees that function return true then it handles printing out debug info and inducing a panic if necessary. Let's change the API a little bit in preparation for the buddy hardlockup detector. The buddy hardlockup detector wants to print nearly the same debug info and have nearly the same panic behavior. That means we want to move all that code to the common file. For now, the code in the common file will only be there if the perf hardlockup detector is enabled, but eventually it will be selected by a common config. Right now, this _just_ moves the code from the perf detector file to the common file and changes the names. It doesn't make the changes that the buddy hardlockup detector will need and doesn't do any style cleanups. A future patch will do cleanup to make it more obvious what changed. With the above, we no longer have any callers of is_hardlockup() outside of the "watchdog.c" file, so we can remove it from the header, make it static, and move it to the same "#ifdef" block as our new watchdog_hardlockup_check(). While doing this, it can be noted that even if no hardlockup detectors were configured the existing code used to still have the code for counting/checking "hrtimer_interrupts" even if the perf hardlockup detector wasn't configured. We didn't need to do that, so move all the "hrtimer_interrupts" counting to only be there if the perf hardlockup detector is configured as well. This change is expected to be a no-op. Signed-off-by: Douglas Anderson --- Changes in v5: - watchdog_hardlockup_interrupt_count() => watchdog_hardlockup_kick() - watchdog_hardlockup_is_lockedup() => is_hardlockup() Changes in v4: - ("Move perf hardlockup checking/panic ...") new for v4. include/linux/nmi.h| 5 ++- kernel/watchdog.c | 93 +- kernel/watchdog_perf.c | 42 +-- 3 files changed, 78 insertions(+), 62 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index fafab128f37e..0c62c1bf0a71 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -15,7 +15,6 @@ void lockup_detector_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); -bool is_hardlockup(void); extern int watchdog_user_enabled; extern int nmi_watchdog_user_enabled; @@ -88,6 +87,10 @@ extern unsigned int hardlockup_panic; static inline void hardlockup_detector_disable(void) {} #endif +#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) +void watchdog_hardlockup_check(struct pt_regs *regs); +#endif + #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) # define NMI_WATCHDOG_SYSCTL_PERM 0644 #else diff --git a/kernel/watchdog.c b/kernel/watchdog.c index c705a18b26bf..12ce37d76e7d 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -85,6 +85,78 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); #endif /* CONFIG_HARDLOCKUP_DETECTOR */ +#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) + +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); +static DEFINE_PER_CPU(bool, hard_watchdog_warn); +static unsigned long hardlockup_allcpu_dumped; + +static bool is_hardlockup(void) +{ + unsigned long hrint = __this_cpu_read(hrtimer_interrupts); + + if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) + return true; + + __this_cpu_write(hrtimer_interrupts_saved, hrint); + return false; +} + +static void watchdog_hardlockup_kick(void) +{ + __this_cpu_inc(hrtimer_interrupts); +} + +void watchdog_hardlockup_check(struct pt_regs *regs) +{ + /* check for a hardlockup +* This is done by making sure our timer interrupt +* is incrementing. The timer interrupt should have +* fired multiple times before we overflow'd. If it hasn't +* then this is a good indication the cpu is stuck +*/ + if (is_hardlockup()) { + int this_cpu = smp_processor_id(); + + /* only print hardlockups once */ + if (__this_cpu_read(hard_watchdog_warn) == true) + return; + + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", +this_cpu); + print_modules(); + print_irqtrace_events(current); + if (regs) + show_regs(regs); + else + dump_stack(); + + /* +* Perform all-CPU dump only once to avoi
[PATCH v5 07/18] watchdog/perf: Rename watchdog_hld.c to watchdog_perf.c
The code currently in "watchdog_hld.c" is for detecting hardlockups using perf, as evidenced by the line in the Makefile that only compiles this file if CONFIG_HARDLOCKUP_DETECTOR_PERF is defined. Rename the file to prepare for the buddy hardlockup detector, which doesn't use perf. It could be argued that the new name makes it less obvious that this is a hardlockup detector. While true, it's not hard to remember that the "perf" detector is always a hardlockup detector and it's nice not to have names that are too convoluted. Acked-by: Nicholas Piggin Reviewed-by: Petr Mladek Signed-off-by: Douglas Anderson --- (no changes since v4) Changes in v4: - ("Rename watchdog_hld.c to watchdog_perf.c") new for v4. kernel/Makefile| 2 +- kernel/{watchdog_hld.c => watchdog_perf.c} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename kernel/{watchdog_hld.c => watchdog_perf.c} (99%) diff --git a/kernel/Makefile b/kernel/Makefile index b69c95315480..7eb72033143c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -91,7 +91,7 @@ obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o -obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_perf.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_perf.c similarity index 99% rename from kernel/watchdog_hld.c rename to kernel/watchdog_perf.c index 2125b09e09d7..8b8015758ea5 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_perf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Detect hard lockups on a system + * Detect hard lockups on a system using perf * * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. * -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 06/18] watchdog/hardlockup: Add comments to touch_nmi_watchdog()
In preparation for the buddy hardlockup detector, add comments to touch_nmi_watchdog() to make it obvious that it touches the configured hardlockup detector regardless of whether it's backed by an NMI. Also note that arch_touch_nmi_watchdog() may not be architecture-specific. Ideally, we'd like to rename these functions but that is a fairly disruptive change touching a lot of drivers. After discussion [1] the plan is to defer this until a good time. [1] https://lore.kernel.org/r/ZFy0TX1tfhlH8gxj@alley Signed-off-by: Douglas Anderson --- Changes in v5: - No longer rename touch_nmi_watchdog(), just add comments. Changes in v4: - ("Rename touch_nmi_watchdog() to ...") new for v4. include/linux/nmi.h | 23 +++ 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 454fe99c4874..fafab128f37e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -125,15 +125,30 @@ void watchdog_nmi_disable(unsigned int cpu); void lockup_detector_reconfigure(void); /** - * touch_nmi_watchdog - restart NMI watchdog timeout. + * touch_nmi_watchdog - manually pet the hardlockup watchdog. * - * If the architecture supports the NMI watchdog, touch_nmi_watchdog() - * may be used to reset the timeout - for code which intentionally - * disables interrupts for a long time. This call is stateless. + * If we support detecting hardlockups, touch_nmi_watchdog() may be + * used to pet the watchdog (reset the timeout) - for code which + * intentionally disables interrupts for a long time. This call is stateless. + * + * Though this function has "nmi" in the name, the hardlockup watchdog might + * not be backed by NMIs. This function will likely be renamed to + * touch_hardlockup_watchdog() in the future. */ static inline void touch_nmi_watchdog(void) { + /* +* Pass on to the hardlockup detector selected via CONFIG_. Note that +* the hardlockup detector may not be arch-specific nor using NMIs +* and the arch_touch_nmi_watchdog() function will likely be renamed +* in the future. +*/ arch_touch_nmi_watchdog(); + + /* +* Touching the hardlock detector implcitily pets the +* softlockup detector too +*/ touch_softlockup_watchdog(); } -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 05/18] watchdog/perf: Ensure CPU-bound context when creating hardlockup detector event
From: Pingfan Liu hardlockup_detector_event_create() should create perf_event on the current CPU. Preemption could not get disabled because perf_event_create_kernel_counter() allocates memory. Instead, the CPU locality is achieved by processing the code in a per-CPU bound kthread. Add a check to prevent mistakes when calling the code in another code path. Signed-off-by: Pingfan Liu Co-developed-by: Lecopzer Chen Signed-off-by: Lecopzer Chen Reviewed-by: Petr Mladek Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. This is a nice cleanup and could land together with the rest of my series if that makes sense. I changed the patch prefix to match others in my series. [1] https://lore.kernel.org/r/20220903093415.15850-4-lecopzer.c...@mediatek.com/ (no changes since v4) Changes in v4: - Pulled ("Ensure CPU-bound context when creating ...") into my series for v4. kernel/watchdog_hld.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 1e8a49dc956e..2125b09e09d7 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -165,10 +165,16 @@ static void watchdog_overflow_callback(struct perf_event *event, static int hardlockup_detector_event_create(void) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu; struct perf_event_attr *wd_attr; struct perf_event *evt; + /* +* Preemption is not disabled because memory will be allocated. +* Ensure CPU-locality by calling this in per-CPU kthread. +*/ + WARN_ON(!is_percpu_thread()); + cpu = raw_smp_processor_id(); wd_attr = &wd_hw_attr; wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 04/18] watchdog/hardlockup: change watchdog_nmi_enable() to void
From: Lecopzer Chen Nobody cares about the return value of watchdog_nmi_enable(), changing its prototype to void. Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Reviewed-by: Petr Mladek Acked-by: Nicholas Piggin Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. This is a nice cleanup and could land together with the rest of my series if that makes sense. I changed the patch prefix to match others in my series. [1] https://lore.kernel.org/r/20220903093415.15850-3-lecopzer.c...@mediatek.com/ (no changes since v4) Changes in v4: - Pulled ("change watchdog_nmi_enable() to void") into my series for v4. arch/sparc/kernel/nmi.c | 8 +++- include/linux/nmi.h | 2 +- kernel/watchdog.c | 3 +-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 060fff95a305..5dcf31f7e81f 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -282,11 +282,11 @@ __setup("nmi_watchdog=", setup_nmi_watchdog); * sparc specific NMI watchdog enable function. * Enables watchdog if it is not enabled already. */ -int watchdog_nmi_enable(unsigned int cpu) +void watchdog_nmi_enable(unsigned int cpu) { if (atomic_read(&nmi_active) == -1) { pr_warn("NMI watchdog cannot be enabled or disabled\n"); - return -1; + return; } /* @@ -295,11 +295,9 @@ int watchdog_nmi_enable(unsigned int cpu) * process first. */ if (!nmi_init_done) - return 0; + return; smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); - - return 0; } /* * sparc specific NMI watchdog disable function. diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 771d77b62bc1..454fe99c4874 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -119,7 +119,7 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } void watchdog_nmi_stop(void); void watchdog_nmi_start(void); int watchdog_nmi_probe(void); -int watchdog_nmi_enable(unsigned int cpu); +void watchdog_nmi_enable(unsigned int cpu); void watchdog_nmi_disable(unsigned int cpu); void lockup_detector_reconfigure(void); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 582d572e1379..c705a18b26bf 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -93,10 +93,9 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); * softlockup watchdog start and stop. The arch must select the * SOFTLOCKUP_DETECTOR Kconfig. */ -int __weak watchdog_nmi_enable(unsigned int cpu) +void __weak watchdog_nmi_enable(unsigned int cpu) { hardlockup_detector_perf_enable(); - return 0; } void __weak watchdog_nmi_disable(unsigned int cpu) -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 03/18] watchdog: remove WATCHDOG_DEFAULT
From: Lecopzer Chen No reference to WATCHDOG_DEFAULT, remove it. Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Reviewed-by: Petr Mladek Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. This is a nice cleanup and could land together with the rest of my series if that makes sense. I changed the patch prefix to match others in my series. [1] https://lore.kernel.org/r/20220903093415.15850-2-lecopzer.c...@mediatek.com/ (no changes since v4) Changes in v4: - Pulled ("remove WATCHDOG_DEFAULT") into my series for v4. kernel/watchdog.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8e61f21e7e33..582d572e1379 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -30,10 +30,8 @@ static DEFINE_MUTEX(watchdog_mutex); #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) -# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED) # define NMI_WATCHDOG_DEFAULT 1 #else -# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED) # define NMI_WATCHDOG_DEFAULT 0 #endif -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 02/18] watchdog/perf: More properly prevent false positives with turbo modes
Currently, in the watchdog_overflow_callback() we first check to see if the watchdog had been touched and _then_ we handle the workaround for turbo mode. This order should be reversed. Specifically, "touching" the hardlockup detector's watchdog should avoid lockups being detected for one period that should be roughly the same regardless of whether we're running turbo or not. That means that we should do the extra accounting for turbo _before_ we look at (and clear) the global indicating that we've been touched. NOTE: this fix is made based on code inspection. I am not aware of any reports where the old code would have generated false positives. That being said, this order seems more correct and also makes it easier down the line to share code with the "buddy" hardlockup detector. Fixes: 7edaeb6841df ("kernel/watchdog: Prevent false positives with turbo modes") Signed-off-by: Douglas Anderson --- Changes in v5: - ("More properly prevent false ...") promoted to its own patch for v5. kernel/watchdog_hld.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 247bf0b1582c..1e8a49dc956e 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -114,14 +114,14 @@ static void watchdog_overflow_callback(struct perf_event *event, /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; + if (!watchdog_check_timestamp()) + return; + if (__this_cpu_read(watchdog_nmi_touch) == true) { __this_cpu_write(watchdog_nmi_touch, false); return; } - if (!watchdog_check_timestamp()) - return; - /* check for a hardlockup * This is done by making sure our timer interrupt * is incrementing. The timer interrupt should have -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 00/18] watchdog/hardlockup: Add the buddy hardlockup detector
") new for v4. - ("Add a weak function for an arch to detect ...") new for v4. - ("Define dummy watchdog_update_hrtimer_threshold() ...") new for v4. - ("Have the perf hardlockup use __weak ...") new for v4. - ("Move perf hardlockup checking/panic ...") new for v4. - ("Move perf hardlockup watchdog petting to watchdog.c") new for v4. - ("Rename some "NMI watchdog" constants/function ...") new for v4. - ("Rename touch_nmi_watchdog() to ...") new for v4. - ("Rename watchdog_hld.c to watchdog_perf.c") new for v4. - ("Style changes to watchdog_hardlockup_check ...") new for v4. - Pulled ("Adapt the watchdog_hld interface ...") into my series for v4. - Pulled ("Enable perf events based hard ...") into my series for v4. - Pulled ("Ensure CPU-bound context when creating ...") into my series for v4. - Pulled ("add hw_nmi_get_sample_period for ...") into my series for v4. - Pulled ("change watchdog_nmi_enable() to void") into my series for v4. - Pulled ("remove WATCHDOG_DEFAULT") into my series for v4. - Reworked atop a whole pile of cleanups, as suggested by Petr. Changes in v3: - Added a note in commit message about the effect on idle. - Cleaned up commit message pros/cons to be complete sentences. - More cpu => CPU (in Kconfig and comments). - No code changes other than comments. Changes in v2: - No code changes. - Reworked description and Kconfig based on v1 discussion. - cpu => CPU (in commit message). Douglas Anderson (13): watchdog/perf: Define dummy watchdog_update_hrtimer_threshold() on correct config watchdog/perf: More properly prevent false positives with turbo modes watchdog/hardlockup: Add comments to touch_nmi_watchdog() watchdog/perf: Rename watchdog_hld.c to watchdog_perf.c watchdog/hardlockup: Move perf hardlockup checking/panic to common watchdog.c watchdog/hardlockup: Style changes to watchdog_hardlockup_check() / is_hardlockup() watchdog/hardlockup: Add a "cpu" param to watchdog_hardlockup_check() watchdog/hardlockup: Move perf hardlockup watchdog petting to watchdog.c watchdog/hardlockup: Rename some "NMI watchdog" constants/function watchdog/hardlockup: Have the perf hardlockup use __weak functions more cleanly watchdog/hardlockup: detect hard lockups using secondary (buddy) CPUs watchdog/perf: Add a weak function for an arch to detect if perf can use NMIs arm64: Enable perf events based hard lockup detector Lecopzer Chen (4): watchdog: remove WATCHDOG_DEFAULT watchdog/hardlockup: change watchdog_nmi_enable() to void watchdog/perf: Adapt the watchdog_perf interface for async model arm64: add hw_nmi_get_sample_period for preparation of lockup detector Pingfan Liu (1): watchdog/perf: Ensure CPU-bound context when creating hardlockup detector event arch/arm64/Kconfig | 2 + arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/watchdog_hld.c | 36 +++ arch/powerpc/include/asm/nmi.h | 4 +- arch/powerpc/kernel/watchdog.c | 12 +- arch/powerpc/platforms/pseries/mobility.c | 4 +- arch/sparc/kernel/nmi.c| 10 +- drivers/perf/arm_pmu.c | 5 + drivers/perf/arm_pmuv3.c | 12 +- include/linux/nmi.h| 73 +++-- include/linux/perf/arm_pmu.h | 2 + kernel/Makefile| 3 +- kernel/watchdog.c | 344 +++-- kernel/watchdog_buddy.c| 93 ++ kernel/{watchdog_hld.c => watchdog_perf.c} | 105 +++ lib/Kconfig.debug | 52 +++- 16 files changed, 550 insertions(+), 208 deletions(-) create mode 100644 arch/arm64/kernel/watchdog_hld.c create mode 100644 kernel/watchdog_buddy.c rename kernel/{watchdog_hld.c => watchdog_perf.c} (72%) -- 2.40.1.698.g37aff9b760-goog
[PATCH v5 01/18] watchdog/perf: Define dummy watchdog_update_hrtimer_threshold() on correct config
The real watchdog_update_hrtimer_threshold() is defined in kernel/watchdog_hld.c. That file is included if CONFIG_HARDLOCKUP_DETECTOR_PERF and the function is defined in that file if CONFIG_HARDLOCKUP_CHECK_TIMESTAMP. The dummy version of the function in "nmi.h" didn't get that quite right. While this doesn't appear to be a huge deal, it's nice to make it consistent. It doesn't break builds because CHECK_TIMESTAMP is only defined by x86 so others don't get a double definition, and x86 uses perf lockup detector, so it gets the out of line version. Fixes: 7edaeb6841df ("kernel/watchdog: Prevent false positives with turbo modes") Reviewed-by: Nicholas Piggin Reviewed-by: Petr Mladek Signed-off-by: Douglas Anderson --- Changes in v5: - Add Nicholas's explanation of why this didn't break builds. - watchdog_hardlockup_perf.c => kernel/watchdog_hld.c in description. Changes in v4: - ("Define dummy watchdog_update_hrtimer_threshold() ...") new for v4. include/linux/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 048c0b9aa623..771d77b62bc1 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -197,7 +197,7 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh); #endif #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ -defined(CONFIG_HARDLOCKUP_DETECTOR) +defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) void watchdog_update_hrtimer_threshold(u64 period); #else static inline void watchdog_update_hrtimer_threshold(u64 period) { } -- 2.40.1.698.g37aff9b760-goog
[PATCH v4 17/17] arm64: Enable perf events based hard lockup detector
With the recent feature added to enable perf events to use pseudo NMIs as interrupts on platforms which support GICv3 or later, its now been possible to enable hard lockup detector (or NMI watchdog) on arm64 platforms. So enable corresponding support. One thing to note here is that normally lockup detector is initialized just after the early initcalls but PMU on arm64 comes up much later as device_initcall(). To cope with that, override arch_perf_nmi_is_available() to let the watchdog framework know PMU not ready, and inform the framework to re-initialize lockup detection once PMU has been initialized. Co-developed-by: Sumit Garg Signed-off-by: Sumit Garg Co-developed-by: Pingfan Liu Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. As part of making this match with my series, I needed to resolve conflicts with the patch ("watchdog/hardlockup: Have the perf hardlockup use __weak functions more cleanly"). This makes ${SUBJECT} patch now depend on the patch ("watchdog/perf: Add a weak function for an arch to detect if perf can use NMIs"). As talked about in that patch, there may be better alternatives to accomplish the same thing. As mentioned in the cover letter, I'm not really expecting this patch to land together with the patches for the buddy detector. I included it with my series simply for convenience of testing both series together. NOTE: the previous patch posted by Lecopzer pointed to Sumit's patch [2] in the commit text but provided no context. I moved it to this "after the cut" note. [1] https://lore.kernel.org/r/20220903093415.15850-7-lecopzer.c...@mediatek.com/ [2] http://lore.kernel.org/linux-arm-kernel/1610712101-14929-1-git-send-email-sumit.g...@linaro.org Changes in v4: - Pulled ("Enable perf events based hard ...") into my series for v4. arch/arm64/Kconfig | 2 ++ arch/arm64/kernel/perf_event.c | 12 ++-- arch/arm64/kernel/watchdog_hld.c | 12 drivers/perf/arm_pmu.c | 5 + include/linux/perf/arm_pmu.h | 2 ++ 5 files changed, 31 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1023e896d46b..4ee9f3a7bc7a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -197,12 +197,14 @@ config ARM64 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KVM select HAVE_NMI select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index dde06c0f97f3..732bc5d76cb1 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -23,6 +23,7 @@ #include #include #include +#include /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL0xC2 @@ -1396,10 +1397,17 @@ static struct platform_driver armv8_pmu_driver = { static int __init armv8_pmu_driver_init(void) { + int ret; + if (acpi_disabled) - return platform_driver_register(&armv8_pmu_driver); + ret = platform_driver_register(&armv8_pmu_driver); else - return arm_pmu_acpi_probe(armv8_pmuv3_pmu_init); + ret = arm_pmu_acpi_probe(armv8_pmuv3_pmu_init); + + if (!ret) + lockup_detector_retry_init(); + + return ret; } device_initcall(armv8_pmu_driver_init) diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c index 2401eb1b7e55..dcd25322127c 100644 --- a/arch/arm64/kernel/watchdog_hld.c +++ b/arch/arm64/kernel/watchdog_hld.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include +#include /* * Safe maximum CPU frequency in case a particular platform doesn't implement @@ -22,3 +24,13 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) return (u64)max_cpu_freq * watchdog_thresh; } + +bool __init arch_perf_nmi_is_available(void) +{ + /* +* hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off, +* however, the pmu interrupts will act like a normal interrupt instead of +* NMI and the hardlockup detector would be broken. +*/ + return arm_pmu_irq_is_nmi(); +} diff --g
[PATCH v4 16/17] arm64: add hw_nmi_get_sample_period for preparation of lockup detector
From: Lecopzer Chen Set safe maximum CPU frequency to 5 GHz in case a particular platform doesn't implement cpufreq driver. Although, architecture doesn't put any restrictions on maximum frequency but 5 GHz seems to be safe maximum given the available Arm CPUs in the market which are clocked much less than 5 GHz. On the other hand, we can't make it much higher as it would lead to a large hard-lockup detection timeout on parts which are running slower (eg. 1GHz on Developerbox) and doesn't possess a cpufreq driver. Co-developed-by: Sumit Garg Signed-off-by: Sumit Garg Co-developed-by: Pingfan Liu Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. The only change I made here was to remove an extra blank line that checkpatch complained about. As mentioned in the cover letter, I'm not really expecting this patch to land together with the patches for the buddy detector. I included it with my series simply for convenience of testing both series together. NOTE: the previous patch posted by Lecopzer pointed to Sumit's patch [2] in the commit text but provided no context. I moved it to this "after the cut" note. [1] https://lore.kernel.org/r/20220903093415.15850-6-lecopzer.c...@mediatek.com/ [2] http://lore.kernel.org/linux-arm-kernel/1610712101-14929-1-git-send-email-sumit.g...@linaro.org Changes in v4: - Pulled ("add hw_nmi_get_sample_period for ...") into my series for v4. arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/watchdog_hld.c | 24 2 files changed, 25 insertions(+) create mode 100644 arch/arm64/kernel/watchdog_hld.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index ceba6792f5b3..f8e2645406a4 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_ARM64_MODULE_PLTS)+= module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c new file mode 100644 index ..2401eb1b7e55 --- /dev/null +++ b/arch/arm64/kernel/watchdog_hld.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +/* + * Safe maximum CPU frequency in case a particular platform doesn't implement + * cpufreq driver. Although, architecture doesn't put any restrictions on + * maximum frequency but 5 GHz seems to be safe maximum given the available + * Arm CPUs in the market which are clocked much less than 5 GHz. On the other + * hand, we can't make it much higher as it would lead to a large hard-lockup + * detection timeout on parts which are running slower (eg. 1GHz on + * Developerbox) and doesn't possess a cpufreq driver. + */ +#define SAFE_MAX_CPU_FREQ 50UL // 5 GHz +u64 hw_nmi_get_sample_period(int watchdog_thresh) +{ + unsigned int cpu = smp_processor_id(); + unsigned long max_cpu_freq; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + max_cpu_freq = SAFE_MAX_CPU_FREQ; + + return (u64)max_cpu_freq * watchdog_thresh; +} -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v4 15/17] watchdog/perf: Adapt the watchdog_perf interface for async model
From: Lecopzer Chen When lockup_detector_init()->watchdog_hardlockup_probe(), PMU may be not ready yet. E.g. on arm64, PMU is not ready until device_initcall(armv8_pmu_driver_init). And it is deeply integrated with the driver model and cpuhp. Hence it is hard to push this initialization before smp_init(). But it is easy to take an opposite approach and try to initialize the watchdog once again later. The delayed probe is called using workqueues. It need to allocate memory and must be proceed in a normal context. The delayed probe is able to use if watchdog_hardlockup_probe() returns non-zero which means the return code returned when PMU is not ready yet. Provide an API - lockup_detector_retry_init() for anyone who needs to delayed init lockup detector if they had ever failed at lockup_detector_init(). The original assumption is: nobody should use delayed probe after lockup_detector_check() which has __init attribute. That is, anyone uses this API must call between lockup_detector_init() and lockup_detector_check(), and the caller must have __init attribute Reviewed-by: Petr Mladek Co-developed-by: Pingfan Liu Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Suggested-by: Petr Mladek Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. As part of making this match with my series, I resolved a few conflicts and did a few renames. I also fixed the kernel test robot yell [2] by providing a dummy implementation of the retry function if CONFIG_LOCKUP_DETECTOR wasn't defined. That led me to move the definition of the function and sanitize the name of the function to match others around it. This patch makes less sense without the patches to add support for the arm64 perf hardlockup detector. Thus, I've put it at the end of the series. I removed the "kernel test robot" tag since that didn't really make sense. Presumably the robot found a problem on a previous version of the patch, but that's not normally a reason to add the Reported-by. [1] https://lore.kernel.org/r/20220903093415.15850-5-lecopzer.c...@mediatek.com/ [2] https://lore.kernel.org/r/202209050639.jdawd49e-...@intel.com/ Changes in v4: - Pulled ("Adapt the watchdog_hld interface ...") into my series for v4. include/linux/nmi.h | 2 ++ kernel/watchdog.c | 67 - 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 9caea5ba494d..3cacc5fc16b9 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -13,6 +13,7 @@ #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); +void lockup_detector_retry_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); @@ -34,6 +35,7 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } +static inline void lockup_detector_retry_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 678d55172ef4..55471634d932 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -211,7 +211,13 @@ void __weak watchdog_hardlockup_enable(unsigned int cpu) { } void __weak watchdog_hardlockup_disable(unsigned int cpu) { } -/* Return 0, if a hardlockup watchdog is available. Error code otherwise */ +/* + * Watchdog-detector specific API. + * + * Return 0 when hardlockup watchdog is available, negative value otherwise. + * Note that the negative value means that a delayed probe might + * succeed later. + */ int __weak __init watchdog_hardlockup_probe(void) { /* @@ -957,6 +963,62 @@ static void __init watchdog_sysctl_init(void) #define watchdog_sysctl_init() do { } while (0) #endif /* CONFIG_SYSCTL */ +static void __init lockup_detector_delay_init(struct work_struct *work); +static bool allow_lockup_detector_init_retry __initdata; + +static struct work_struct detector_work __initdata = + __WORK_INITIALIZER(detector_work, lockup_detector_delay_init); + +static void __init lockup_detector_delay_init(struct work_struct *work) +{ + int ret; + + ret = watchdog_hardlockup_probe(); + if (ret) { + pr_info("Delayed init of the lockup detector failed: %d\n", ret); + pr_info("Hard watchdog permanently disabled\n"); + return; + } + + allow_lockup_detector_init_retry = false; + + nmi_watchdog_available = true; + lockup_detector_setup(); +} + +/* + * lockup_detector_retry_init - retry init lockup detector if possible. + * + * Ret
[PATCH v4 14/17] watchdog/perf: Add a weak function for an arch to detect if perf can use NMIs
On arm64, NMI support needs to be detected at runtime. Add a weak function to the perf hardlockup detector so that an architecture can implement it to detect whether NMIs are available. Signed-off-by: Douglas Anderson --- While I won't object to this patch landing, I consider it part of the arm64 perf hardlockup effort. I would be OK with the earlier patches in the series landing and then not landing ${SUBJECT} patch nor anything else later. I'll also note that, as an alternative to this, it would be nice if we could figure out how to make perf_event_create_kernel_counter() fail on arm64 if NMIs aren't available. Maybe we could add a "must_use_nmi" element to "struct perf_event_attr"? Changes in v4: - ("Add a weak function for an arch to detect ...") new for v4. include/linux/nmi.h| 1 + kernel/watchdog_perf.c | 12 +++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 90aa33317b4c..9caea5ba494d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -212,6 +212,7 @@ static inline bool trigger_single_cpu_backtrace(int cpu) #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh); +bool arch_perf_nmi_is_available(void); #endif #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index a55a6eab1b3a..0d1c292a655d 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -234,12 +234,22 @@ void __init hardlockup_detector_perf_restart(void) } } +bool __weak __init arch_perf_nmi_is_available(void) +{ + return true; +} + /** * watchdog_hardlockup_probe - Probe whether NMI event is available at all */ int __init watchdog_hardlockup_probe(void) { - int ret = hardlockup_detector_event_create(); + int ret; + + if (!arch_perf_nmi_is_available()) + return -ENODEV; + + ret = hardlockup_detector_event_create(); if (ret) { pr_info("Perf NMI watchdog permanently disabled\n"); -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v4 13/17] watchdog/hardlockup: detect hard lockups using secondary (buddy) CPUs
From: Colin Cross Implement a hardlockup detector that doesn't doesn't need any extra arch-specific support code to detect lockups. Instead of using something arch-specific we will use the buddy system, where each CPU watches out for another one. Specifically, each CPU will use its softlockup hrtimer to check that the next CPU is processing hrtimer interrupts by verifying that a counter is increasing. NOTE: unlike the other hard lockup detectors, the buddy one can't easily show what's happening on the CPU that locked up just by doing a simple backtrace. It relies on some other mechanism in the system to get information about the locked up CPUs. This could be support for NMI backtraces like [1], it could be a mechanism for printing the PC of locked CPUs at panic time like [2] / [3], or it could be something else. Even though that means we still rely on arch-specific code, this arch-specific code seems to often be implemented even on architectures that don't have a hardlockup detector. This style of hardlockup detector originated in some downstream Android trees and has been rebased on / carried in ChromeOS trees for quite a long time for use on arm and arm64 boards. Historically on these boards we've leveraged mechanism [2] / [3] to get information about hung CPUs, but we could move to [1]. Although the original motivation for the buddy system was for use on systems without an arch-specific hardlockup detector, it can still be useful to use even on systems that _do_ have an arch-specific hardlockup detector. On x86, for instance, there is a 24-part patch series [4] in progress switching the arch-specific hard lockup detector from a scarce perf counter to a less-scarce hardware resource. Potentially the buddy system could be a simpler alternative to free up the perf counter but still get hard lockup detection. Overall, pros (+) and cons (-) of the buddy system compared to an arch-specific hardlockup detector (which might be implemented using perf): + The buddy system is usable on systems that don't have an arch-specific hardlockup detector, like arm32 and arm64 (though it's being worked on for arm64 [5]). + The buddy system may free up scarce hardware resources. + If a CPU totally goes out to lunch (can't process NMIs) the buddy system could still detect the problem (though it would be unlikely to be able to get a stack trace). + The buddy system uses the same timer function to pet the hardlockup detector on the running CPU as it uses to detect hardlockups on other CPUs. Compared to other hardlockup detectors, this means it generates fewer interrupts and thus is likely better able to let CPUs stay idle longer. - If all CPUs are hard locked up at the same time the buddy system can't detect it. - If we don't have SMP we can't use the buddy system. - The buddy system needs an arch-specific mechanism (possibly NMI backtrace) to get info about the locked up CPU. [1] https://lore.kernel.org/r/20230419225604.21204-1-diand...@chromium.org [2] https://issuetracker.google.com/172213129 [3] https://docs.kernel.org/trace/coresight/coresight-cpu-debug.html [4] https://lore.kernel.org/lkml/20230301234753.28582-1-ricardo.neri-calde...@linux.intel.com/ [5] https://lore.kernel.org/linux-arm-kernel/20220903093415.15850-1-lecopzer.c...@mediatek.com/ Signed-off-by: Colin Cross Signed-off-by: Matthias Kaehlcke Signed-off-by: Guenter Roeck Signed-off-by: Tzung-Bi Shih Signed-off-by: Douglas Anderson --- This patch has been rebased in ChromeOS kernel trees many times, and each time someone had to do work on it they added their Signed-off-by. I've included those here. I've also left the author as Colin Cross since the core code is still his, even if it's now been reorganized a lot. Changes in v4: - Reworked atop a whole pile of cleanups, as suggested by Petr. Changes in v3: - Added a note in commit message about the effect on idle. - Cleaned up commit message pros/cons to be complete sentences. - More cpu => CPU (in Kconfig and comments). - No code changes other than comments. Changes in v2: - No code changes. - Reworked description and Kconfig based on v1 discussion. - cpu => CPU (in commit message). include/linux/nmi.h | 9 +++- kernel/Makefile | 1 + kernel/watchdog.c | 29 + kernel/watchdog_buddy.c | 93 + lib/Kconfig.debug | 52 +-- 5 files changed, 173 insertions(+), 11 deletions(-) create mode 100644 kernel/watchdog_buddy.c diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 094a0e7ed97d..90aa33317b4c 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -87,8 +87,9 @@ extern unsigned int hardlockup_panic; static inline void hardlockup_detector_disable(void) {} #endif -#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) +#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER) void arch_touch
[PATCH v4 12/17] watchdog/hardlockup: Have the perf hardlockup use __weak functions more cleanly
The fact that there watchdog_hardlockup_enable(), watchdog_hardlockup_disable(), and watchdog_hardlockup_probe() are declared __weak means that the configured hardlockup detector can define non-weak versions of those functions if it needs to. Instead of doing this, the perf hardlockup detector hooked itself into the default __weak implementation, which was a bit awkward. Clean this up. >From comments, it looks as if the original design was done because the __weak function were expected to implemented by the architecture and not by the configured hardlockup detector. This got awkward when we tried to add the buddy lockup detector which was not arch-specific but wanted to hook into those same functions. This is not expected to have any functional impact. Signed-off-by: Douglas Anderson --- Changes in v4: - ("Have the perf hardlockup use __weak ...") new for v4. include/linux/nmi.h| 10 -- kernel/watchdog.c | 30 ++ kernel/watchdog_perf.c | 20 ++-- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 4ff48f189ab1..094a0e7ed97d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -103,21 +103,11 @@ static inline void arch_touch_nmi_watchdog(void) { } #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); -extern void hardlockup_detector_perf_disable(void); -extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); -extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } -static inline void hardlockup_detector_perf_disable(void) { } -static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } -# if !defined(CONFIG_HAVE_NMI_WATCHDOG) -static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } -# else -static inline int hardlockup_detector_perf_init(void) { return 0; } -# endif #endif void watchdog_hardlockup_stop(void); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8e11b2b69e2c..e21896a0a9d5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -190,27 +190,33 @@ static inline void watchdog_hardlockup_interrupt_count(void) { } #endif /* !CONFIG_HARDLOCKUP_DETECTOR_PERF */ /* - * These functions can be overridden if an architecture implements its - * own hardlockup detector. + * These functions can be overridden based on the configured hardlockdup detector. * * watchdog_hardlockup_enable/disable can be implemented to start and stop when - * softlockup watchdog start and stop. The arch must select the + * softlockup watchdog start and stop. The detector must select the * SOFTLOCKUP_DETECTOR Kconfig. */ -void __weak watchdog_hardlockup_enable(unsigned int cpu) -{ - hardlockup_detector_perf_enable(); -} +void __weak watchdog_hardlockup_enable(unsigned int cpu) { } -void __weak watchdog_hardlockup_disable(unsigned int cpu) -{ - hardlockup_detector_perf_disable(); -} +void __weak watchdog_hardlockup_disable(unsigned int cpu) { } /* Return 0, if a hardlockup watchdog is available. Error code otherwise */ int __weak __init watchdog_hardlockup_probe(void) { - return hardlockup_detector_perf_init(); + /* +* If CONFIG_HAVE_NMI_WATCHDOG is defined then an architecture +* is assumed to have the hard watchdog available and we return 0. +*/ + if (IS_ENABLED(CONFIG_HAVE_NMI_WATCHDOG)) + return 0; + + /* +* Hardlockup detectors other than those using CONFIG_HAVE_NMI_WATCHDOG +* are required to implement a non-weak version of this probe function +* to tell whether they are available. If they don't override then +* we'll return -ENODEV. +*/ + return -ENODEV; } /** diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index aadc52b79f5b..a55a6eab1b3a 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -132,10 +132,14 @@ static int hardlockup_detector_event_create(void) } /** - * hardlockup_detector_perf_enable - Enable the local event + * watchdog_hardlockup_enable - Enable the local event + * + * @cpu: The CPU to enable hard lockup on. */ -void hardlockup_detector_perf_enable(void) +void watchdog_hardlockup_enable(unsigned int cpu) { + WARN_ON_ONCE(cpu != smp_processor_id()); + if (hardlockup_detector_event_create()) return; @@ -147,12 +151,16 @@ void hardlockup_detector_perf_enable(void) } /** - * hardlockup_detector_perf_disable - Disable the local event + * watchdog_hardlockup_disable - Disable the local event + * + * @cpu: The CPU to enable hard lockup on. */ -void hardlockup_detector_p
[PATCH v4 11/17] watchdog/hardlockup: Rename some "NMI watchdog" constants/function
Do a search and replace of: - NMI_WATCHDOG_ENABLED => HARD_WATCHDOG_ENABLED - watchdog_nmi_ => watchdog_hardlockup_ Then update a few comments near where names were changed. This is specifically to make it less confusing when we want to introduce the buddy hardlockup detector, which isn't using NMIs. Signed-off-by: Douglas Anderson --- Changes in v4: - ("Rename some "NMI watchdog" constants/function ...") new for v4. arch/powerpc/include/asm/nmi.h| 4 +- arch/powerpc/kernel/watchdog.c| 12 ++--- arch/powerpc/platforms/pseries/mobility.c | 4 +- arch/sparc/kernel/nmi.c | 4 +- include/linux/nmi.h | 14 +++--- kernel/watchdog.c | 60 +++ kernel/watchdog_perf.c| 2 +- 7 files changed, 50 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index c3c7adef74de..43bfd4de868f 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -5,10 +5,10 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); long soft_nmi_interrupt(struct pt_regs *regs); -void watchdog_nmi_set_timeout_pct(u64 pct); +void watchdog_hardlockup_set_timeout_pct(u64 pct); #else static inline void arch_touch_nmi_watchdog(void) {} -static inline void watchdog_nmi_set_timeout_pct(u64 pct) {} +static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {} #endif #ifdef CONFIG_NMI_IPI diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index dbcc4a793f0b..27d1f0dba5b3 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -438,7 +438,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & HARD_WATCHDOG_ENABLED)) return HRTIMER_NORESTART; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -479,7 +479,7 @@ static void start_watchdog(void *arg) return; } - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & HARD_WATCHDOG_ENABLED)) return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -546,7 +546,7 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_stop(void) +void watchdog_hardlockup_stop(void) { int cpu; @@ -554,7 +554,7 @@ void watchdog_nmi_stop(void) stop_watchdog_on_cpu(cpu); } -void watchdog_nmi_start(void) +void watchdog_hardlockup_start(void) { int cpu; @@ -566,7 +566,7 @@ void watchdog_nmi_start(void) /* * Invoked from core watchdog init. */ -int __init watchdog_nmi_probe(void) +int __init watchdog_hardlockup_probe(void) { int err; @@ -582,7 +582,7 @@ int __init watchdog_nmi_probe(void) } #ifdef CONFIG_PPC_PSERIES -void watchdog_nmi_set_timeout_pct(u64 pct) +void watchdog_hardlockup_set_timeout_pct(u64 pct) { pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct); WRITE_ONCE(wd_timeout_pct, pct); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 643d309d1bd0..9475388235a3 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -758,7 +758,7 @@ static int pseries_migrate_partition(u64 handle) goto out; if (factor) - watchdog_nmi_set_timeout_pct(factor); + watchdog_hardlockup_set_timeout_pct(factor); ret = pseries_suspend(handle); if (ret == 0) { @@ -774,7 +774,7 @@ static int pseries_migrate_partition(u64 handle) pseries_cancel_migration(handle, ret); if (factor) - watchdog_nmi_set_timeout_pct(0); + watchdog_hardlockup_set_timeout_pct(0); out: vas_migration_handler(VAS_RESUME); diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 5dcf31f7e81f..9d9e29b75c43 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -282,7 +282,7 @@ __setup("nmi_watchdog=", setup_nmi_watchdog); * sparc specific NMI watchdog enable function. * Enables watchdog if it is not enabled already. */ -void watchdog_nmi_enable(unsigned int cpu) +void watchdog_hardlockup_enable(unsigned int cpu) { if (atomic_read(&nmi_active) == -1) { pr_warn("NMI watchdog cannot be enabled or disabled\n"); @@ -303,7 +303,7 @@ void watchdog_nmi_enable(unsigned int cpu) * sparc specific NMI watchdog disable function. * Disables watchdog if it is not disabled already. */ -void watchdog_nmi_disable(unsigned int cpu) +void watchdog_hardlockup_disable(unsigned int cpu) {
[PATCH v4 10/17] watchdog/hardlockup: Move perf hardlockup watchdog petting to watchdog.c
In preparation for the buddy hardlockup detector, which wants the same petting logic as the current perf hardlockup detector, move the code to watchdog.c. While doing this, rename the global variable to match others nearby. The arch_touch_nmi_watchdog() function is not renamed since that is exported with "EXPORT_SYMBOL" and is thus ABI. Currently the code in watchdog.c is guarded by CONFIG_HARDLOCKUP_DETECTOR_PERF, which makes this change seem silly. However, a future patch will change this. NOTE: there is a slight side effect to this change, though from code analysis I believe it to be a beneficial one. Specifically the perf hardlockup detector will now do check the "timestamp" before clearing any watchdog pets. Previously the order was reversed. With the old order if the watchdog perf event was firing super fast then it would also be clearing existing watchdog pets super fast. The new behavior of handling super-fast perf before clearing watchdog pets seems better. Signed-off-by: Douglas Anderson --- Changes in v4: - ("Move perf hardlockup watchdog petting to watchdog.c") new for v4. include/linux/nmi.h| 5 +++-- kernel/watchdog.c | 19 +++ kernel/watchdog_perf.c | 19 --- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 2c9ea1ba285c..94ff84e1c8ec 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -88,7 +88,10 @@ static inline void hardlockup_detector_disable(void) {} #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) +void arch_touch_nmi_watchdog(void); void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); +#elif !defined(CONFIG_HAVE_NMI_WATCHDOG) +static inline void arch_touch_nmi_watchdog(void) { } #endif #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) @@ -98,7 +101,6 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) -extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); @@ -113,7 +115,6 @@ static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } # if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } -static inline void arch_touch_nmi_watchdog(void) {} # else static inline int hardlockup_detector_perf_init(void) { return 0; } # endif diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 367bea0167a5..9c17090611f2 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -90,8 +90,22 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(bool, watchdog_hardlockup_processed); +static DEFINE_PER_CPU(bool, watchdog_hardlockup_touch); static unsigned long watchdog_hardlockup_dumped_stacks; +notrace void arch_touch_nmi_watchdog(void) +{ + /* +* Using __raw here because some code paths have +* preemption enabled. If preemption is enabled +* then interrupts should be enabled too, in which +* case we shouldn't have to worry about the watchdog +* going off. +*/ + raw_cpu_write(watchdog_hardlockup_touch, true); +} +EXPORT_SYMBOL(arch_touch_nmi_watchdog); + static bool watchdog_hardlockup_is_lockedup(unsigned int cpu) { unsigned long hrint = per_cpu(hrtimer_interrupts, cpu); @@ -111,6 +125,11 @@ static void watchdog_hardlockup_interrupt_count(void) void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) { + if (__this_cpu_read(watchdog_hardlockup_touch)) { + __this_cpu_write(watchdog_hardlockup_touch, false); + return; + } + /* * Check for a hardlockup by making sure the CPU's timer * interrupt is incrementing. The timer interrupt should have diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index 9be90b2a2ea7..547917ebd5d3 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -20,26 +20,12 @@ #include #include -static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); static DEFINE_PER_CPU(struct perf_event *, dead_event); static struct cpumask dead_events_mask; static atomic_t watchdog_cpus = ATOMIC_INIT(0); -notrace void arch_touch_nmi_watchdog(void) -{ - /* -* Using __raw here because some code paths have -* preemption enabled. If preemption is enabled -* then interrupts should be enabled too, in which -* case we shouldn't have to worry about the watchdog -* going off. -*/ - raw
[PATCH v4 09/17] watchdog/hardlockup: Add a "cpu" param to watchdog_hardlockup_check()
In preparation for the buddy hardlockup detector where the CPU checking for lockup might not be the currently running CPU, add a "cpu" parameter to watchdog_hardlockup_check(). Signed-off-by: Douglas Anderson --- Changes in v4: - ("Add a "cpu" param to watchdog_hardlockup_check()") new for v4. include/linux/nmi.h| 2 +- kernel/watchdog.c | 47 -- kernel/watchdog_perf.c | 2 +- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index c6cb9bc5dc80..2c9ea1ba285c 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -88,7 +88,7 @@ static inline void hardlockup_detector_disable(void) {} #endif #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) -void watchdog_hardlockup_check(struct pt_regs *regs); +void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); #endif #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f46669c1671d..367bea0167a5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -92,14 +92,14 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(bool, watchdog_hardlockup_processed); static unsigned long watchdog_hardlockup_dumped_stacks; -static bool watchdog_hardlockup_is_lockedup(void) +static bool watchdog_hardlockup_is_lockedup(unsigned int cpu) { - unsigned long hrint = __this_cpu_read(hrtimer_interrupts); + unsigned long hrint = per_cpu(hrtimer_interrupts, cpu); - if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) + if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint) return true; - __this_cpu_write(hrtimer_interrupts_saved, hrint); + per_cpu(hrtimer_interrupts_saved, cpu) = hrint; return false; } @@ -109,7 +109,7 @@ static void watchdog_hardlockup_interrupt_count(void) __this_cpu_inc(hrtimer_interrupts); } -void watchdog_hardlockup_check(struct pt_regs *regs) +void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) { /* * Check for a hardlockup by making sure the CPU's timer @@ -117,35 +117,50 @@ void watchdog_hardlockup_check(struct pt_regs *regs) * fired multiple times before we overflow'd. If it hasn't * then this is a good indication the cpu is stuck */ - if (watchdog_hardlockup_is_lockedup()) { + if (watchdog_hardlockup_is_lockedup(cpu)) { unsigned int this_cpu = smp_processor_id(); + struct cpumask backtrace_mask = *cpu_online_mask; /* Only handle hardlockups once. */ - if (__this_cpu_read(watchdog_hardlockup_processed)) + if (per_cpu(watchdog_hardlockup_processed, cpu)) return; - pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu); + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu); print_modules(); print_irqtrace_events(current); - if (regs) + if (regs) { show_regs(regs); - else - dump_stack(); + cpumask_clear_cpu(cpu, &backtrace_mask); + } else { + /* +* If the locked up CPU is different than the CPU we're +* running on then we'll try to backtrace the CPU that +* locked up and then exclude it from later backtraces. +* If that fails or if we're running on the locked up +* CPU, just do a normal backtrace. +*/ + if (cpu != this_cpu && trigger_single_cpu_backtrace(cpu)) { + cpumask_clear_cpu(cpu, &backtrace_mask); + } else { + dump_stack(); + cpumask_clear_cpu(this_cpu, &backtrace_mask); + } + } /* -* Perform all-CPU dump only once to avoid multiple hardlockups -* generating interleaving traces +* Perform multi-CPU dump only once to avoid multiple +* hardlockups generating interleaving traces */ if (sysctl_hardlockup_all_cpu_backtrace && !test_and_set_bit(0, &watchdog_hardlockup_dumped_stacks)) - trigger_allbutself_cpu_backtrace(); + trigger_cpumask_backtrace(&backtrace_mask); if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); - __this_cpu_write(watchdog_hardlockup_processed
[PATCH v4 08/17] watchdog/hardlockup: Style changes to watchdog_hardlockup_check() / ..._is_lockedup()
These are tiny style changes: - Add a blank line before a "return". - Renames two globals to use the "watchdog_hld" prefix. - Store processor id in "unsigned int" rather than "int". - Minor comment rewording. - Use "else" rather than extra returns since it seemed more symmetric. Signed-off-by: Douglas Anderson --- Changes in v4: - ("Style changes to watchdog_hardlockup_check ...") new for v4. kernel/watchdog.c | 32 +++- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 2d319cdf64b9..f46669c1671d 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -89,8 +89,8 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); -static DEFINE_PER_CPU(bool, hard_watchdog_warn); -static unsigned long hardlockup_allcpu_dumped; +static DEFINE_PER_CPU(bool, watchdog_hardlockup_processed); +static unsigned long watchdog_hardlockup_dumped_stacks; static bool watchdog_hardlockup_is_lockedup(void) { @@ -100,6 +100,7 @@ static bool watchdog_hardlockup_is_lockedup(void) return true; __this_cpu_write(hrtimer_interrupts_saved, hrint); + return false; } @@ -110,21 +111,20 @@ static void watchdog_hardlockup_interrupt_count(void) void watchdog_hardlockup_check(struct pt_regs *regs) { - /* check for a hardlockup -* This is done by making sure our timer interrupt -* is incrementing. The timer interrupt should have -* fired multiple times before we overflow'd. If it hasn't + /* +* Check for a hardlockup by making sure the CPU's timer +* interrupt is incrementing. The timer interrupt should have +* fired multiple times before we overflow'd. If it hasn't * then this is a good indication the cpu is stuck */ if (watchdog_hardlockup_is_lockedup()) { - int this_cpu = smp_processor_id(); + unsigned int this_cpu = smp_processor_id(); - /* only print hardlockups once */ - if (__this_cpu_read(hard_watchdog_warn) == true) + /* Only handle hardlockups once. */ + if (__this_cpu_read(watchdog_hardlockup_processed)) return; - pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", -this_cpu); + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu); print_modules(); print_irqtrace_events(current); if (regs) @@ -137,18 +137,16 @@ void watchdog_hardlockup_check(struct pt_regs *regs) * generating interleaving traces */ if (sysctl_hardlockup_all_cpu_backtrace && - !test_and_set_bit(0, &hardlockup_allcpu_dumped)) + !test_and_set_bit(0, &watchdog_hardlockup_dumped_stacks)) trigger_allbutself_cpu_backtrace(); if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); - __this_cpu_write(hard_watchdog_warn, true); - return; + __this_cpu_write(watchdog_hardlockup_processed, true); + } else { + __this_cpu_write(watchdog_hardlockup_processed, false); } - - __this_cpu_write(hard_watchdog_warn, false); - return; } #else /* CONFIG_HARDLOCKUP_DETECTOR_PERF */ -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v4 07/17] watchdog/hardlockup: Move perf hardlockup checking/panic to common watchdog.c
The perf hardlockup detector works by looking at interrupt counts and seeing if they change from run to run. The interrupt counts are managed by the common watchdog code via its watchdog_timer_fn(). Currently the API between the perf detector and the common code is a function: is_hardlockup(). When the hard lockup detector sees that function return true then it handles printing out debug info and inducing a panic if necessary. Let's change the API a little bit in preparation for the buddy hardlockup detector. The buddy hardlockup detector wants to print nearly the same debug info and have nearly the same panic behavior. That means we want to move all that code to the common file. For now, the code in the common file will only be there if the perf hardlockup detector is enabled, but eventually it will be selected by a common config. Right now, this _just_ moves the code from the perf detector file to the common file and changes the names. It doesn't make the changes that the buddy hardlockup detector will need and doesn't do any style cleanups. A future patch will do cleanup to make it more obvious what changed. With the above, we no longer have any callers of is_hardlockup() outside of the "watchdog.c" file, so we can remove it from the header, make it static, move it to the same "#ifdef" block as our new watchdog_hardlockup_check(), and rename it to make it obvious it's just for hardlockup detectors. While doing this, it can be noted that even if no hardlockup detectors were configured the existing code used to still have the code for counting/checking "hrtimer_interrupts" even if the perf hardlockup detector wasn't configured. We didn't need to do that, so move all the "hrtimer_interrupts" counting to only be there if the perf hardlockup detector is configured as well. This change is expected to be a no-op. Signed-off-by: Douglas Anderson --- Changes in v4: - ("Move perf hardlockup checking/panic ...") new for v4. include/linux/nmi.h| 5 ++- kernel/watchdog.c | 92 +- kernel/watchdog_perf.c | 42 +-- 3 files changed, 78 insertions(+), 61 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 35d09d70f394..c6cb9bc5dc80 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -15,7 +15,6 @@ void lockup_detector_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); -bool is_hardlockup(void); extern int watchdog_user_enabled; extern int nmi_watchdog_user_enabled; @@ -88,6 +87,10 @@ extern unsigned int hardlockup_panic; static inline void hardlockup_detector_disable(void) {} #endif +#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) +void watchdog_hardlockup_check(struct pt_regs *regs); +#endif + #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) # define NMI_WATCHDOG_SYSCTL_PERM 0644 #else diff --git a/kernel/watchdog.c b/kernel/watchdog.c index c705a18b26bf..2d319cdf64b9 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -85,6 +85,78 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); #endif /* CONFIG_HARDLOCKUP_DETECTOR */ +#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) + +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); +static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); +static DEFINE_PER_CPU(bool, hard_watchdog_warn); +static unsigned long hardlockup_allcpu_dumped; + +static bool watchdog_hardlockup_is_lockedup(void) +{ + unsigned long hrint = __this_cpu_read(hrtimer_interrupts); + + if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) + return true; + + __this_cpu_write(hrtimer_interrupts_saved, hrint); + return false; +} + +static void watchdog_hardlockup_interrupt_count(void) +{ + __this_cpu_inc(hrtimer_interrupts); +} + +void watchdog_hardlockup_check(struct pt_regs *regs) +{ + /* check for a hardlockup +* This is done by making sure our timer interrupt +* is incrementing. The timer interrupt should have +* fired multiple times before we overflow'd. If it hasn't +* then this is a good indication the cpu is stuck +*/ + if (watchdog_hardlockup_is_lockedup()) { + int this_cpu = smp_processor_id(); + + /* only print hardlockups once */ + if (__this_cpu_read(hard_watchdog_warn) == true) + return; + + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", +this_cpu); + print_modules(); + print_irqtrace_events(current); + if (regs) + show_regs(regs); + else + dump_stack(); + + /* +* Perform all-CPU dump only once to avoid multiple hardlockups +
[PATCH v4 06/17] watchdog/perf: Rename watchdog_hld.c to watchdog_perf.c
The code currently in "watchdog_hld.c" is for detecting hardlockups using perf, as evidenced by the line in the Makefile that only compiles this file if CONFIG_HARDLOCKUP_DETECTOR_PERF is defined. Rename the file to prepare for the buddy hardlockup detector, which doesn't use perf. It could be argued that the new name makes it less obvious that this is a hardlockup detector. While true, it's not hard to remember that the "perf" detector is always a hardlockup detector and it's nice not to have names that are too convoluted. Signed-off-by: Douglas Anderson --- Changes in v4: - ("Rename watchdog_hld.c to watchdog_perf.c") new for v4. kernel/Makefile| 2 +- kernel/{watchdog_hld.c => watchdog_perf.c} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename kernel/{watchdog_hld.c => watchdog_perf.c} (99%) diff --git a/kernel/Makefile b/kernel/Makefile index 10ef068f598d..406c4dd3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -91,7 +91,7 @@ obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o -obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_perf.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_perf.c similarity index 99% rename from kernel/watchdog_hld.c rename to kernel/watchdog_perf.c index 96b717205952..c3d8ceb149da 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_perf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Detect hard lockups on a system + * Detect hard lockups on a system using perf * * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. * -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v4 05/17] watchdog/hardlockup: Rename touch_nmi_watchdog() to touch_hardlockup_watchdog()
In preparation for the buddy hardlockup detector, rename touch_nmi_watchdog() to touch_hardlockup_watchdog() to make it clear that it will touch whatever hardlockup detector is configured. We'll add a #define for the old name (touch_nmi_watchdog) so that we don't have to touch every piece of code referring to the old name. Ideally this change would also rename the arch_touch_nmi_watchdog(), but that is harder since arch_touch_nmi_watchdog() is exported with EXPORT_SYMBOL() and thus is ABI. Add a comment next to the call to hopefully alleviate some of the confusion here. Signed-off-by: Douglas Anderson --- Changes in v4: - ("Rename touch_nmi_watchdog() to ...") new for v4. include/linux/nmi.h | 27 ++- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 454fe99c4874..35d09d70f394 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -125,18 +125,35 @@ void watchdog_nmi_disable(unsigned int cpu); void lockup_detector_reconfigure(void); /** - * touch_nmi_watchdog - restart NMI watchdog timeout. + * touch_hardlockup_watchdog - manually pet the hardlockup watchdog. * - * If the architecture supports the NMI watchdog, touch_nmi_watchdog() - * may be used to reset the timeout - for code which intentionally - * disables interrupts for a long time. This call is stateless. + * If we support detecting hardlockups, touch_hardlockup_watchdog() may be + * used to pet the watchdog (reset the timeout) - for code which + * intentionally disables interrupts for a long time. This call is stateless. */ -static inline void touch_nmi_watchdog(void) +static inline void touch_hardlockup_watchdog(void) { + /* +* Pass on to the hardlockup detector selected via CONFIG_. Note that +* the hardlockup detector may not be arch-specific nor using NMIs, +* but arch_touch_nmi_watchdog() is exported with EXPORT_SYMBOL() and +* is thus ABI. +*/ arch_touch_nmi_watchdog(); + + /* +* Touching the hardlock detector implcitily pets the +* softlockup detector too +*/ touch_softlockup_watchdog(); } +/* + * It's encouraged for code to refer to the new name, but allow the old + * name as well. + */ +#define touch_nmi_watchdog touch_hardlockup_watchdog + /* * Create trigger_all_cpu_backtrace() out of the arch-provided * base function. Return whether such support was available, -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v4 04/17] watchdog/perf: Ensure CPU-bound context when creating hardlockup detector event
From: Pingfan Liu hardlockup_detector_event_create() should create perf_event on the current CPU. Preemption could not get disabled because perf_event_create_kernel_counter() allocates memory. Instead, the CPU locality is achieved by processing the code in a per-CPU bound kthread. Add a check to prevent mistakes when calling the code in another code path. Signed-off-by: Pingfan Liu Co-developed-by: Lecopzer Chen Signed-off-by: Lecopzer Chen Reviewed-by: Petr Mladek Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. This is a nice cleanup and could land together with the rest of my series if that makes sense. I changed the patch prefix to match others in my series. [1] https://lore.kernel.org/r/20220903093415.15850-4-lecopzer.c...@mediatek.com/ Changes in v4: - Pulled ("Ensure CPU-bound context when creating ...") into my series for v4. kernel/watchdog_hld.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 247bf0b1582c..96b717205952 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -165,10 +165,16 @@ static void watchdog_overflow_callback(struct perf_event *event, static int hardlockup_detector_event_create(void) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu; struct perf_event_attr *wd_attr; struct perf_event *evt; + /* +* Preemption is not disabled because memory will be allocated. +* Ensure CPU-locality by calling this in per-CPU kthread. +*/ + WARN_ON(!is_percpu_thread()); + cpu = raw_smp_processor_id(); wd_attr = &wd_hw_attr; wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v4 03/17] watchdog/hardlockup: change watchdog_nmi_enable() to void
From: Lecopzer Chen Nobody cares about the return value of watchdog_nmi_enable(), changing its prototype to void. Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Reviewed-by: Petr Mladek Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. This is a nice cleanup and could land together with the rest of my series if that makes sense. I changed the patch prefix to match others in my series. [1] https://lore.kernel.org/r/20220903093415.15850-3-lecopzer.c...@mediatek.com/ Changes in v4: - Pulled ("change watchdog_nmi_enable() to void") into my series for v4. arch/sparc/kernel/nmi.c | 8 +++- include/linux/nmi.h | 2 +- kernel/watchdog.c | 3 +-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 060fff95a305..5dcf31f7e81f 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -282,11 +282,11 @@ __setup("nmi_watchdog=", setup_nmi_watchdog); * sparc specific NMI watchdog enable function. * Enables watchdog if it is not enabled already. */ -int watchdog_nmi_enable(unsigned int cpu) +void watchdog_nmi_enable(unsigned int cpu) { if (atomic_read(&nmi_active) == -1) { pr_warn("NMI watchdog cannot be enabled or disabled\n"); - return -1; + return; } /* @@ -295,11 +295,9 @@ int watchdog_nmi_enable(unsigned int cpu) * process first. */ if (!nmi_init_done) - return 0; + return; smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); - - return 0; } /* * sparc specific NMI watchdog disable function. diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 771d77b62bc1..454fe99c4874 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -119,7 +119,7 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } void watchdog_nmi_stop(void); void watchdog_nmi_start(void); int watchdog_nmi_probe(void); -int watchdog_nmi_enable(unsigned int cpu); +void watchdog_nmi_enable(unsigned int cpu); void watchdog_nmi_disable(unsigned int cpu); void lockup_detector_reconfigure(void); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 582d572e1379..c705a18b26bf 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -93,10 +93,9 @@ __setup("nmi_watchdog=", hardlockup_panic_setup); * softlockup watchdog start and stop. The arch must select the * SOFTLOCKUP_DETECTOR Kconfig. */ -int __weak watchdog_nmi_enable(unsigned int cpu) +void __weak watchdog_nmi_enable(unsigned int cpu) { hardlockup_detector_perf_enable(); - return 0; } void __weak watchdog_nmi_disable(unsigned int cpu) -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v4 02/17] watchdog: remove WATCHDOG_DEFAULT
From: Lecopzer Chen No reference to WATCHDOG_DEFAULT, remove it. Signed-off-by: Pingfan Liu Signed-off-by: Lecopzer Chen Reviewed-by: Petr Mladek Signed-off-by: Douglas Anderson --- I yanked this patch from the mailing lists [1] into my series just to make it easier to avoid conflicts between my series and the one adding the arm64 perf hardlockup detector, in case someone wanted to test them both together. This is a nice cleanup and could land together with the rest of my series if that makes sense. I changed the patch prefix to match others in my series. [1] https://lore.kernel.org/r/20220903093415.15850-2-lecopzer.c...@mediatek.com/ Changes in v4: - Pulled ("remove WATCHDOG_DEFAULT") into my series for v4. kernel/watchdog.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8e61f21e7e33..582d572e1379 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -30,10 +30,8 @@ static DEFINE_MUTEX(watchdog_mutex); #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) -# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED) # define NMI_WATCHDOG_DEFAULT 1 #else -# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED) # define NMI_WATCHDOG_DEFAULT 0 #endif -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v4 01/17] watchdog/perf: Define dummy watchdog_update_hrtimer_threshold() on correct config
The real watchdog_update_hrtimer_threshold() is defined in watchdog_hardlockup_perf.c. That file is included if CONFIG_HARDLOCKUP_DETECTOR_PERF and the function is defined in that file if CONFIG_HARDLOCKUP_CHECK_TIMESTAMP. The dummy version of the function in "nmi.h" didn't get that quite right. While this doesn't appear to be a huge deal, it's nice to make it consistent. Fixes: 7edaeb6841df ("kernel/watchdog: Prevent false positives with turbo modes") Signed-off-by: Douglas Anderson --- Changes in v4: - ("Define dummy watchdog_update_hrtimer_threshold() ...") new for v4. include/linux/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 048c0b9aa623..771d77b62bc1 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -197,7 +197,7 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh); #endif #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ -defined(CONFIG_HARDLOCKUP_DETECTOR) +defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) void watchdog_update_hrtimer_threshold(u64 period); #else static inline void watchdog_update_hrtimer_threshold(u64 period) { } -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v4 00/17] watchdog/hardlockup: Add the buddy hardlockup detector
ge pros/cons to be complete sentences. - More cpu => CPU (in Kconfig and comments). - No code changes other than comments. Changes in v2: - No code changes. - Reworked description and Kconfig based on v1 discussion. - cpu => CPU (in commit message). Colin Cross (1): watchdog/hardlockup: detect hard lockups using secondary (buddy) CPUs Douglas Anderson (11): watchdog/perf: Define dummy watchdog_update_hrtimer_threshold() on correct config watchdog/hardlockup: Rename touch_nmi_watchdog() to touch_hardlockup_watchdog() watchdog/perf: Rename watchdog_hld.c to watchdog_perf.c watchdog/hardlockup: Move perf hardlockup checking/panic to common watchdog.c watchdog/hardlockup: Style changes to watchdog_hardlockup_check() / ..._is_lockedup() watchdog/hardlockup: Add a "cpu" param to watchdog_hardlockup_check() watchdog/hardlockup: Move perf hardlockup watchdog petting to watchdog.c watchdog/hardlockup: Rename some "NMI watchdog" constants/function watchdog/hardlockup: Have the perf hardlockup use __weak functions more cleanly watchdog/perf: Add a weak function for an arch to detect if perf can use NMIs arm64: Enable perf events based hard lockup detector Lecopzer Chen (4): watchdog: remove WATCHDOG_DEFAULT watchdog/hardlockup: change watchdog_nmi_enable() to void watchdog/perf: Adapt the watchdog_perf interface for async model arm64: add hw_nmi_get_sample_period for preparation of lockup detector Pingfan Liu (1): watchdog/perf: Ensure CPU-bound context when creating hardlockup detector event arch/arm64/Kconfig | 2 + arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/perf_event.c | 12 +- arch/arm64/kernel/watchdog_hld.c | 36 +++ arch/powerpc/include/asm/nmi.h | 4 +- arch/powerpc/kernel/watchdog.c | 12 +- arch/powerpc/platforms/pseries/mobility.c | 4 +- arch/sparc/kernel/nmi.c| 10 +- drivers/perf/arm_pmu.c | 5 + include/linux/nmi.h| 73 +++-- include/linux/perf/arm_pmu.h | 2 + kernel/Makefile| 3 +- kernel/watchdog.c | 293 - kernel/watchdog_buddy.c| 93 +++ kernel/{watchdog_hld.c => watchdog_perf.c} | 105 +++- lib/Kconfig.debug | 52 +++- 16 files changed, 527 insertions(+), 180 deletions(-) create mode 100644 arch/arm64/kernel/watchdog_hld.c create mode 100644 kernel/watchdog_buddy.c rename kernel/{watchdog_hld.c => watchdog_perf.c} (72%) -- 2.40.1.521.gf1e218fcd8-goog
[PATCH v8 00/10] arm64: Add framework to turn an IPI as NMI
This is an attempt to resurrect Sumit's old patch series [1] that allowed us to use the arm64 pseudo-NMI to get backtraces of CPUs and also to round up CPUs in kdb/kgdb. The last post from Sumit that I could find was v7, so I called this series v8. I haven't copied all of his old changelongs here, but you can find them from the link. Since v7, I have: * Addressed the small amount of feedback that was there for v7. * Rebased. * Added a new patch that prevents us from spamming the logs with idle tasks. * Added an extra patch to gracefully fall back to regular IPIs if pseudo-NMIs aren't there. Since there appear to be a few different patches series related to being able to use NMIs to get stack traces of crashed systems, let me try to organize them to the best of my understanding: a) This series. On its own, a) will (among other things) enable stack traces of all running processes with the soft lockup detector if you've enabled the sysctl "kernel.softlockup_all_cpu_backtrace". On its own, a) doesn't give a hard lockup detector. b) A different recently-posted series [2] that adds a hard lockup detector based on perf. On its own, b) gives a stack crawl of the locked up CPU but no stack crawls of other CPUs (even if they're locked too). Together with a) + b) we get everything (full lockup detect, full ability to get stack crawls). c) The old Android "buddy" hard lockup detector [3] that I'm considering trying to upstream. If b) lands then I believe c) would be redundant (at least for arm64). c) on its own is really only useful on arm64 for platforms that can print CPU_DBGPCSR somehow (see [4]). a) + c) is roughly as good as a) + b). [1] https://lore.kernel.org/linux-arm-kernel/1604317487-14543-1-git-send-email-sumit.g...@linaro.org/ [2] https://lore.kernel.org/linux-arm-kernel/20220903093415.15850-1-lecopzer.c...@mediatek.com/ [3] https://issuetracker.google.com/172213097 [4] https://issuetracker.google.com/172213129 Changes in v8: - dynamic_ipi_setup() and dynamic_ipi_teardown() no longer take cpu param - dynamic_ipi_setup() and dynamic_ipi_teardown() no longer take cpu param - Add loongarch support, too - Removed "#ifdef CONFIG_SMP" since arm64 is always SMP - "Tag the arm64 idle functions as __cpuidle" new for v8 - "Provide a stub kgdb_nmicallback() if !CONFIG_KGDB" new for v8 - "Fallback to a regular IPI if NMI isn't enabled" new for v8 Douglas Anderson (3): arm64: idle: Tag the arm64 idle functions as __cpuidle kgdb: Provide a stub kgdb_nmicallback() if !CONFIG_KGDB arm64: ipi_nmi: Fallback to a regular IPI if NMI isn't enabled Sumit Garg (7): arm64: Add framework to turn IPI as NMI irqchip/gic-v3: Enable support for SGIs to act as NMIs arm64: smp: Assign and setup an IPI as NMI nmi: backtrace: Allow runtime arch specific override arm64: ipi_nmi: Add support for NMI backtrace kgdb: Expose default CPUs roundup fallback mechanism arm64: kgdb: Roundup cpus using IPI as NMI arch/arm/include/asm/irq.h | 2 +- arch/arm/kernel/smp.c| 3 +- arch/arm64/include/asm/irq.h | 4 ++ arch/arm64/include/asm/nmi.h | 17 + arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/idle.c | 4 +- arch/arm64/kernel/ipi_nmi.c | 103 +++ arch/arm64/kernel/kgdb.c | 18 ++ arch/arm64/kernel/smp.c | 8 +++ arch/loongarch/include/asm/irq.h | 2 +- arch/loongarch/kernel/process.c | 3 +- arch/mips/include/asm/irq.h | 2 +- arch/mips/kernel/process.c | 3 +- arch/powerpc/include/asm/nmi.h | 2 +- arch/powerpc/kernel/stacktrace.c | 3 +- arch/sparc/include/asm/irq_64.h | 2 +- arch/sparc/kernel/process_64.c | 4 +- arch/x86/include/asm/irq.h | 2 +- arch/x86/kernel/apic/hw_nmi.c| 3 +- drivers/irqchip/irq-gic-v3.c | 29 ++--- include/linux/kgdb.h | 13 include/linux/nmi.h | 12 ++-- kernel/debug/debug_core.c| 8 ++- 23 files changed, 217 insertions(+), 32 deletions(-) create mode 100644 arch/arm64/include/asm/nmi.h create mode 100644 arch/arm64/kernel/ipi_nmi.c -- 2.40.0.634.g4ca3ef3211-goog
[PATCH v8 04/10] nmi: backtrace: Allow runtime arch specific override
From: Sumit Garg Add a boolean return to arch_trigger_cpumask_backtrace() to support a use-case where a particular architecture detects at runtime if it supports NMI backtrace or it would like to fallback to default implementation using SMP cross-calls. Currently such an architecture example is arm64 supporting pseudo NMIs feature which is only available on platforms which have support for GICv3 or later version. Signed-off-by: Sumit Garg Tested-by: Chen-Yu Tsai Signed-off-by: Douglas Anderson --- Changes in v8: - Add loongarch support, too arch/arm/include/asm/irq.h | 2 +- arch/arm/kernel/smp.c| 3 ++- arch/loongarch/include/asm/irq.h | 2 +- arch/loongarch/kernel/process.c | 3 ++- arch/mips/include/asm/irq.h | 2 +- arch/mips/kernel/process.c | 3 ++- arch/powerpc/include/asm/nmi.h | 2 +- arch/powerpc/kernel/stacktrace.c | 3 ++- arch/sparc/include/asm/irq_64.h | 2 +- arch/sparc/kernel/process_64.c | 4 +++- arch/x86/include/asm/irq.h | 2 +- arch/x86/kernel/apic/hw_nmi.c| 3 ++- include/linux/nmi.h | 12 13 files changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index a7c2337b0c7d..e6b62c7d6f0e 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -32,7 +32,7 @@ void init_IRQ(void); #ifdef CONFIG_SMP #include -extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, +extern bool arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 0b8c25763adc..acb97d9219b1 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -849,7 +849,8 @@ static void raise_nmi(cpumask_t *mask) __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask); } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +bool arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) { nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_nmi); + return true; } diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index a115e8999c69..c7a152d6bf0c 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -40,7 +40,7 @@ void spurious_interrupt(void); #define NR_IRQS_LEGACY 16 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace -void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); +bool arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); #define MAX_IO_PICS 2 #define NR_IRQS(64 + (256 * MAX_IO_PICS)) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index fa2443c7afb2..8f7f818f5c4e 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -339,9 +339,10 @@ static void raise_backtrace(cpumask_t *mask) } } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +bool arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) { nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace); + return true; } #ifdef CONFIG_64BIT diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 44f9824c1d8c..daf16173486a 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -77,7 +77,7 @@ extern int cp0_fdc_irq; extern int get_c0_fdc_int(void); -void arch_trigger_cpumask_backtrace(const struct cpumask *mask, +bool arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 093dbbd6b843..7d538571830a 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -750,9 +750,10 @@ static void raise_backtrace(cpumask_t *mask) } } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +bool arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) { nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace); + return true; } int mips_get_process_fp_mode(struct task_struct *task) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index c3c7adef74de..135f65adcf63 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -12,7 +12,7 @@ static inline void watchdog_nmi_set_timeout_pct(u64 pct) {} #endif #ifdef CONFIG_NMI_IPI -extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, +extern bool arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define
[REPOST PATCH v6 2/4] kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function()
When I had lockdep turned on and dropped into kgdb I got a nice splat on my system. Specifically it hit: DEBUG_LOCKS_WARN_ON(current->hardirq_context) Specifically it looked like this: sysrq: SysRq : DEBUG [ cut here ] DEBUG_LOCKS_WARN_ON(current->hardirq_context) WARNING: CPU: 0 PID: 0 at .../kernel/locking/lockdep.c:2875 lockdep_hardirqs_on+0xf0/0x160 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #27 pstate: 604003c9 (nZCv DAIF +PAN -UAO) pc : lockdep_hardirqs_on+0xf0/0x160 ... Call trace: lockdep_hardirqs_on+0xf0/0x160 trace_hardirqs_on+0x188/0x1ac kgdb_roundup_cpus+0x14/0x3c kgdb_cpu_enter+0x53c/0x5cc kgdb_handle_exception+0x180/0x1d4 kgdb_compiled_brk_fn+0x30/0x3c brk_handler+0x134/0x178 do_debug_exception+0xfc/0x178 el1_dbg+0x18/0x78 kgdb_breakpoint+0x34/0x58 sysrq_handle_dbg+0x54/0x5c __handle_sysrq+0x114/0x21c handle_sysrq+0x30/0x3c qcom_geni_serial_isr+0x2dc/0x30c ... ... irq event stamp: ...45 hardirqs last enabled at (...44): [...] __do_softirq+0xd8/0x4e4 hardirqs last disabled at (...45): [...] el1_irq+0x74/0x130 softirqs last enabled at (...42): [...] _local_bh_enable+0x2c/0x34 softirqs last disabled at (...43): [...] irq_exit+0xa8/0x100 ---[ end trace adf21f830c46e638 ]--- Looking closely at it, it seems like a really bad idea to be calling local_irq_enable() in kgdb_roundup_cpus(). If nothing else that seems like it could violate spinlock semantics and cause a deadlock. Instead, let's use a private csd alongside smp_call_function_single_async() to round up the other CPUs. Using smp_call_function_single_async() doesn't require interrupts to be enabled so we can remove the offending bit of code. In order to avoid duplicating this across all the architectures that use the default kgdb_roundup_cpus(), we'll add a "weak" implementation to debug_core.c. Looking at all the people who previously had copies of this code, there were a few variants. I've attempted to keep the variants working like they used to. Specifically: * For arch/arc we passed NULL to kgdb_nmicallback() instead of get_irq_regs(). * For arch/mips there was a bit of extra code around kgdb_nmicallback() NOTE: In this patch we will still get into trouble if we try to round up a CPU that failed to round up before. We'll try to round it up again and potentially hang when we try to grab the csd lock. That's not new behavior but we'll still try to do better in a future patch. Suggested-by: Daniel Thompson Signed-off-by: Douglas Anderson Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Richard Kuo Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Acked-by: Will Deacon --- Changes in v6: - Moved smp_call_function_single_async() error check to patch 3. Changes in v5: - Add a comment about get_irq_regs(). - get_cpu() => raw_smp_processor_id() in kgdb_roundup_cpus(). - for_each_cpu() => for_each_online_cpu() - Error check smp_call_function_single_async() Changes in v4: None Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). arch/arc/kernel/kgdb.c | 10 ++ arch/arm/kernel/kgdb.c | 12 --- arch/arm64/kernel/kgdb.c | 12 --- arch/hexagon/kernel/kgdb.c | 27 - arch/mips/kernel/kgdb.c| 9 + arch/powerpc/kernel/kgdb.c | 4 ++-- arch/sh/kernel/kgdb.c | 12 --- include/linux/kgdb.h | 15 -- kernel/debug/debug_core.c | 41 ++ 9 files changed, 59 insertions(+), 83 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 0932851028e0..68d9fe4b5aa7 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -192,18 +192,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) instruction_pointer(regs) = ip; } -static void kgdb_call_nmi_hook(void *ignored) +void kgdb_call_nmi_hook(void *ignored) { + /* Default implementation passes get_irq_regs() but we don't */ kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - struct kgdb_arch arch_kgdb_ops = { /* breakpoint instruction: TRAP_S 0x3 */ #ifde
[REPOST PATCH v6 1/4] kgdb: Remove irq flags from roundup
The function kgdb_roundup_cpus() was passed a parameter that was documented as: > the flags that will be used when restoring the interrupts. There is > local_irq_save() call before kgdb_roundup_cpus(). Nobody used those flags. Anyone who wanted to temporarily turn on interrupts just did local_irq_enable() and local_irq_disable() without looking at them. So we can definitely remove the flags. Signed-off-by: Douglas Anderson Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Richard Kuo Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Acked-by: Will Deacon --- Changes in v6: None Changes in v5: None Changes in v4: None Changes in v3: None Changes in v2: - Removing irq flags separated from fixing lockdep splat. arch/arc/kernel/kgdb.c | 2 +- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/kgdb.c | 2 +- arch/hexagon/kernel/kgdb.c | 9 ++--- arch/mips/kernel/kgdb.c| 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/sh/kernel/kgdb.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++--- include/linux/kgdb.h | 9 ++--- kernel/debug/debug_core.c | 2 +- 11 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 9a3c34af2ae8..0932851028e0 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -197,7 +197,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index caa0dbe3dc61..f21077b077be 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -175,7 +175,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index a20de58061a8..12c339ff6e75 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -289,7 +289,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c index 16c24b22d0b2..012e0e230ac2 100644 --- a/arch/hexagon/kernel/kgdb.c +++ b/arch/hexagon/kernel/kgdb.c @@ -119,17 +119,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) /** * kgdb_roundup_cpus - Get other CPUs into a holding pattern - * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs * and get them be in a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, - * in case of MIPS, smp_call_function() is used to roundup CPUs. In - * this case, we have to make sure that interrupts are enabled before - * calling smp_call_function(). The argument to this function is - * the flags that will be used when restoring the interrupts. There is - * local_irq_save() call before kgdb_roundup_cpus(). + * in case of MIPS, smp_call_function() is used to roundup CPUs. * * On non-SMP systems, this is not called. */ @@ -139,7 +134,7 @@ static void hexagon_kgdb_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(hexagon_kgdb_nmi_hook, NULL, 0); diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index eb6c0d582626..2b05effc17b4 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -219,7 +219,7 @@ static void kgdb_call_nmi_hook(void *ignored) set_fs(old_fs); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 59c578f865aa..b0e804844be0 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -124,7 +124,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) } #ifdef CONFIG_SMP -void kgdb_roundup_cpus(unsigned long flags) +v
[REPOST PATCH v6 0/4] kgdb: Fix kgdb_roundup_cpus()
This series was originally part of the series ("serial: Finish kgdb on qcom_geni; fix many lockdep splats w/ kgdb") but it made sense to split it up. It's believed that dropping into kgdb should be more robust once these patches are applied. Repost of v6 adds CC's and also tags already received. Changes in v6: - Moved smp_call_function_single_async() error check to patch 3. Changes in v5: - Add a comment about get_irq_regs(). - get_cpu() => raw_smp_processor_id() in kgdb_roundup_cpus(). - for_each_cpu() => for_each_online_cpu() - Error check smp_call_function_single_async() Changes in v4: - Removed smp_mb() calls. - Also clear out .debuggerinfo. - Also clear out .debuggerinfo and .task for the master. - Remove clearing out in kdb_stub for offline CPUs; it's now redundant. Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. - Don't round up a CPU that failed rounding up before new for v3. - Don't back trace on a cpu that didn't round up new for v3. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). Douglas Anderson (4): kgdb: Remove irq flags from roundup kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function() kgdb: Don't round up a CPU that failed rounding up before kdb: Don't back trace on a cpu that didn't round up arch/arc/kernel/kgdb.c | 10 + arch/arm/kernel/kgdb.c | 12 -- arch/arm64/kernel/kgdb.c| 12 -- arch/hexagon/kernel/kgdb.c | 32 arch/mips/kernel/kgdb.c | 9 + arch/powerpc/kernel/kgdb.c | 6 +-- arch/sh/kernel/kgdb.c | 12 -- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 + include/linux/kgdb.h| 22 +++ kernel/debug/debug_core.c | 65 - kernel/debug/debug_core.h | 1 + kernel/debug/kdb/kdb_bt.c | 11 +- kernel/debug/kdb/kdb_debugger.c | 7 14 files changed, 98 insertions(+), 112 deletions(-) -- 2.20.0.rc1.387.gf8505762e3-goog
[PATCH v6 2/4] kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function()
When I had lockdep turned on and dropped into kgdb I got a nice splat on my system. Specifically it hit: DEBUG_LOCKS_WARN_ON(current->hardirq_context) Specifically it looked like this: sysrq: SysRq : DEBUG [ cut here ] DEBUG_LOCKS_WARN_ON(current->hardirq_context) WARNING: CPU: 0 PID: 0 at .../kernel/locking/lockdep.c:2875 lockdep_hardirqs_on+0xf0/0x160 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #27 pstate: 604003c9 (nZCv DAIF +PAN -UAO) pc : lockdep_hardirqs_on+0xf0/0x160 ... Call trace: lockdep_hardirqs_on+0xf0/0x160 trace_hardirqs_on+0x188/0x1ac kgdb_roundup_cpus+0x14/0x3c kgdb_cpu_enter+0x53c/0x5cc kgdb_handle_exception+0x180/0x1d4 kgdb_compiled_brk_fn+0x30/0x3c brk_handler+0x134/0x178 do_debug_exception+0xfc/0x178 el1_dbg+0x18/0x78 kgdb_breakpoint+0x34/0x58 sysrq_handle_dbg+0x54/0x5c __handle_sysrq+0x114/0x21c handle_sysrq+0x30/0x3c qcom_geni_serial_isr+0x2dc/0x30c ... ... irq event stamp: ...45 hardirqs last enabled at (...44): [...] __do_softirq+0xd8/0x4e4 hardirqs last disabled at (...45): [...] el1_irq+0x74/0x130 softirqs last enabled at (...42): [...] _local_bh_enable+0x2c/0x34 softirqs last disabled at (...43): [...] irq_exit+0xa8/0x100 ---[ end trace adf21f830c46e638 ]--- Looking closely at it, it seems like a really bad idea to be calling local_irq_enable() in kgdb_roundup_cpus(). If nothing else that seems like it could violate spinlock semantics and cause a deadlock. Instead, let's use a private csd alongside smp_call_function_single_async() to round up the other CPUs. Using smp_call_function_single_async() doesn't require interrupts to be enabled so we can remove the offending bit of code. In order to avoid duplicating this across all the architectures that use the default kgdb_roundup_cpus(), we'll add a "weak" implementation to debug_core.c. Looking at all the people who previously had copies of this code, there were a few variants. I've attempted to keep the variants working like they used to. Specifically: * For arch/arc we passed NULL to kgdb_nmicallback() instead of get_irq_regs(). * For arch/mips there was a bit of extra code around kgdb_nmicallback() NOTE: In this patch we will still get into trouble if we try to round up a CPU that failed to round up before. We'll try to round it up again and potentially hang when we try to grab the csd lock. That's not new behavior but we'll still try to do better in a future patch. Suggested-by: Daniel Thompson Signed-off-by: Douglas Anderson --- Changes in v6: - Moved smp_call_function_single_async() error check to patch 3. Changes in v5: - Add a comment about get_irq_regs(). - get_cpu() => raw_smp_processor_id() in kgdb_roundup_cpus(). - for_each_cpu() => for_each_online_cpu() - Error check smp_call_function_single_async() Changes in v4: None Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). arch/arc/kernel/kgdb.c | 10 ++ arch/arm/kernel/kgdb.c | 12 --- arch/arm64/kernel/kgdb.c | 12 --- arch/hexagon/kernel/kgdb.c | 27 - arch/mips/kernel/kgdb.c| 9 + arch/powerpc/kernel/kgdb.c | 4 ++-- arch/sh/kernel/kgdb.c | 12 --- include/linux/kgdb.h | 15 -- kernel/debug/debug_core.c | 41 ++ 9 files changed, 59 insertions(+), 83 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 0932851028e0..68d9fe4b5aa7 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -192,18 +192,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) instruction_pointer(regs) = ip; } -static void kgdb_call_nmi_hook(void *ignored) +void kgdb_call_nmi_hook(void *ignored) { + /* Default implementation passes get_irq_regs() but we don't */ kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - struct kgdb_arch arch_kgdb_ops = { /* breakpoint instruction: TRAP_S 0x3 */ #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index f21077b077be..d9a69e941463 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -170,18 +170,6 @@ static struct undef_hook kgdb_compiled_brkpt_hook = { .fn = kgdb_compiled_brk_fn }; -static void kgdb_call_nmi_hook(void *ignored) -{ -
[PATCH v6 1/4] kgdb: Remove irq flags from roundup
The function kgdb_roundup_cpus() was passed a parameter that was documented as: > the flags that will be used when restoring the interrupts. There is > local_irq_save() call before kgdb_roundup_cpus(). Nobody used those flags. Anyone who wanted to temporarily turn on interrupts just did local_irq_enable() and local_irq_disable() without looking at them. So we can definitely remove the flags. Signed-off-by: Douglas Anderson Acked-by: Will Deacon --- Changes in v6: None Changes in v5: None Changes in v4: None Changes in v3: None Changes in v2: - Removing irq flags separated from fixing lockdep splat. arch/arc/kernel/kgdb.c | 2 +- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/kgdb.c | 2 +- arch/hexagon/kernel/kgdb.c | 9 ++--- arch/mips/kernel/kgdb.c| 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/sh/kernel/kgdb.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++--- include/linux/kgdb.h | 9 ++--- kernel/debug/debug_core.c | 2 +- 11 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 9a3c34af2ae8..0932851028e0 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -197,7 +197,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index caa0dbe3dc61..f21077b077be 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -175,7 +175,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index a20de58061a8..12c339ff6e75 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -289,7 +289,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c index 16c24b22d0b2..012e0e230ac2 100644 --- a/arch/hexagon/kernel/kgdb.c +++ b/arch/hexagon/kernel/kgdb.c @@ -119,17 +119,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) /** * kgdb_roundup_cpus - Get other CPUs into a holding pattern - * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs * and get them be in a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, - * in case of MIPS, smp_call_function() is used to roundup CPUs. In - * this case, we have to make sure that interrupts are enabled before - * calling smp_call_function(). The argument to this function is - * the flags that will be used when restoring the interrupts. There is - * local_irq_save() call before kgdb_roundup_cpus(). + * in case of MIPS, smp_call_function() is used to roundup CPUs. * * On non-SMP systems, this is not called. */ @@ -139,7 +134,7 @@ static void hexagon_kgdb_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(hexagon_kgdb_nmi_hook, NULL, 0); diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index eb6c0d582626..2b05effc17b4 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -219,7 +219,7 @@ static void kgdb_call_nmi_hook(void *ignored) set_fs(old_fs); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 59c578f865aa..b0e804844be0 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -124,7 +124,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) } #ifdef CONFIG_SMP -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { smp_send_debugger_break(); } diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 4f04c6638a4d..cc57630f6bf2 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -319,7 +319,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long
[PATCH v6 0/4] kgdb: Fix kgdb_roundup_cpus()
This series was originally part of the series ("serial: Finish kgdb on qcom_geni; fix many lockdep splats w/ kgdb") but it made sense to split it up. It's believed that dropping into kgdb should be more robust once these patches are applied. Changes in v6: - Moved smp_call_function_single_async() error check to patch 3. Changes in v5: - Add a comment about get_irq_regs(). - get_cpu() => raw_smp_processor_id() in kgdb_roundup_cpus(). - for_each_cpu() => for_each_online_cpu() - Error check smp_call_function_single_async() Changes in v4: - Removed smp_mb() calls. - Also clear out .debuggerinfo. - Also clear out .debuggerinfo and .task for the master. - Remove clearing out in kdb_stub for offline CPUs; it's now redundant. Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. - Don't round up a CPU that failed rounding up before new for v3. - Don't back trace on a cpu that didn't round up new for v3. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). Douglas Anderson (4): kgdb: Remove irq flags from roundup kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function() kgdb: Don't round up a CPU that failed rounding up before kdb: Don't back trace on a cpu that didn't round up arch/arc/kernel/kgdb.c | 10 + arch/arm/kernel/kgdb.c | 12 -- arch/arm64/kernel/kgdb.c| 12 -- arch/hexagon/kernel/kgdb.c | 32 arch/mips/kernel/kgdb.c | 9 + arch/powerpc/kernel/kgdb.c | 6 +-- arch/sh/kernel/kgdb.c | 12 -- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 + include/linux/kgdb.h| 22 +++ kernel/debug/debug_core.c | 65 - kernel/debug/debug_core.h | 1 + kernel/debug/kdb/kdb_bt.c | 11 +- kernel/debug/kdb/kdb_debugger.c | 7 14 files changed, 98 insertions(+), 112 deletions(-) -- 2.20.0.rc0.387.gc7a69e6b6c-goog
[PATCH v5 2/4] kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function()
When I had lockdep turned on and dropped into kgdb I got a nice splat on my system. Specifically it hit: DEBUG_LOCKS_WARN_ON(current->hardirq_context) Specifically it looked like this: sysrq: SysRq : DEBUG [ cut here ] DEBUG_LOCKS_WARN_ON(current->hardirq_context) WARNING: CPU: 0 PID: 0 at .../kernel/locking/lockdep.c:2875 lockdep_hardirqs_on+0xf0/0x160 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #27 pstate: 604003c9 (nZCv DAIF +PAN -UAO) pc : lockdep_hardirqs_on+0xf0/0x160 ... Call trace: lockdep_hardirqs_on+0xf0/0x160 trace_hardirqs_on+0x188/0x1ac kgdb_roundup_cpus+0x14/0x3c kgdb_cpu_enter+0x53c/0x5cc kgdb_handle_exception+0x180/0x1d4 kgdb_compiled_brk_fn+0x30/0x3c brk_handler+0x134/0x178 do_debug_exception+0xfc/0x178 el1_dbg+0x18/0x78 kgdb_breakpoint+0x34/0x58 sysrq_handle_dbg+0x54/0x5c __handle_sysrq+0x114/0x21c handle_sysrq+0x30/0x3c qcom_geni_serial_isr+0x2dc/0x30c ... ... irq event stamp: ...45 hardirqs last enabled at (...44): [...] __do_softirq+0xd8/0x4e4 hardirqs last disabled at (...45): [...] el1_irq+0x74/0x130 softirqs last enabled at (...42): [...] _local_bh_enable+0x2c/0x34 softirqs last disabled at (...43): [...] irq_exit+0xa8/0x100 ---[ end trace adf21f830c46e638 ]--- Looking closely at it, it seems like a really bad idea to be calling local_irq_enable() in kgdb_roundup_cpus(). If nothing else that seems like it could violate spinlock semantics and cause a deadlock. Instead, let's use a private csd alongside smp_call_function_single_async() to round up the other CPUs. Using smp_call_function_single_async() doesn't require interrupts to be enabled so we can remove the offending bit of code. In order to avoid duplicating this across all the architectures that use the default kgdb_roundup_cpus(), we'll add a "weak" implementation to debug_core.c. Looking at all the people who previously had copies of this code, there were a few variants. I've attempted to keep the variants working like they used to. Specifically: * For arch/arc we passed NULL to kgdb_nmicallback() instead of get_irq_regs(). * For arch/mips there was a bit of extra code around kgdb_nmicallback() NOTE: In this patch we will still get into trouble if we try to round up a CPU that failed to round up before. We'll try to round it up again and potentially hang when we try to grab the csd lock. That's not new behavior but we'll still try to do better in a future patch. Suggested-by: Daniel Thompson Signed-off-by: Douglas Anderson --- Changes in v5: - Add a comment about get_irq_regs(). - get_cpu() => raw_smp_processor_id() in kgdb_roundup_cpus(). - for_each_cpu() => for_each_online_cpu() - Error check smp_call_function_single_async() Changes in v4: None Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). arch/arc/kernel/kgdb.c | 10 ++--- arch/arm/kernel/kgdb.c | 12 --- arch/arm64/kernel/kgdb.c | 12 --- arch/hexagon/kernel/kgdb.c | 27 --- arch/mips/kernel/kgdb.c| 9 +--- arch/powerpc/kernel/kgdb.c | 4 ++-- arch/sh/kernel/kgdb.c | 12 --- include/linux/kgdb.h | 15 +++-- kernel/debug/debug_core.c | 44 ++ 9 files changed, 62 insertions(+), 83 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 0932851028e0..68d9fe4b5aa7 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -192,18 +192,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) instruction_pointer(regs) = ip; } -static void kgdb_call_nmi_hook(void *ignored) +void kgdb_call_nmi_hook(void *ignored) { + /* Default implementation passes get_irq_regs() but we don't */ kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - struct kgdb_arch arch_kgdb_ops = { /* breakpoint instruction: TRAP_S 0x3 */ #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index f21077b077be..d9a69e941463 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -170,18 +170,6 @@ static struct undef_hook kgdb_compiled_brkpt_hook = { .fn = kgdb_compiled_brk_fn }; -static void kgdb_call_nmi_hook(void *ignored) -{ - kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); -} - -void kgdb_roundu
[PATCH v5 1/4] kgdb: Remove irq flags from roundup
The function kgdb_roundup_cpus() was passed a parameter that was documented as: > the flags that will be used when restoring the interrupts. There is > local_irq_save() call before kgdb_roundup_cpus(). Nobody used those flags. Anyone who wanted to temporarily turn on interrupts just did local_irq_enable() and local_irq_disable() without looking at them. So we can definitely remove the flags. Signed-off-by: Douglas Anderson Acked-by: Will Deacon --- Changes in v5: None Changes in v4: None Changes in v3: None Changes in v2: - Removing irq flags separated from fixing lockdep splat. arch/arc/kernel/kgdb.c | 2 +- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/kgdb.c | 2 +- arch/hexagon/kernel/kgdb.c | 9 ++--- arch/mips/kernel/kgdb.c| 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/sh/kernel/kgdb.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++--- include/linux/kgdb.h | 9 ++--- kernel/debug/debug_core.c | 2 +- 11 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 9a3c34af2ae8..0932851028e0 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -197,7 +197,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index caa0dbe3dc61..f21077b077be 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -175,7 +175,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index a20de58061a8..12c339ff6e75 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -289,7 +289,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c index 16c24b22d0b2..012e0e230ac2 100644 --- a/arch/hexagon/kernel/kgdb.c +++ b/arch/hexagon/kernel/kgdb.c @@ -119,17 +119,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) /** * kgdb_roundup_cpus - Get other CPUs into a holding pattern - * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs * and get them be in a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, - * in case of MIPS, smp_call_function() is used to roundup CPUs. In - * this case, we have to make sure that interrupts are enabled before - * calling smp_call_function(). The argument to this function is - * the flags that will be used when restoring the interrupts. There is - * local_irq_save() call before kgdb_roundup_cpus(). + * in case of MIPS, smp_call_function() is used to roundup CPUs. * * On non-SMP systems, this is not called. */ @@ -139,7 +134,7 @@ static void hexagon_kgdb_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(hexagon_kgdb_nmi_hook, NULL, 0); diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index eb6c0d582626..2b05effc17b4 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -219,7 +219,7 @@ static void kgdb_call_nmi_hook(void *ignored) set_fs(old_fs); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 59c578f865aa..b0e804844be0 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -124,7 +124,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) } #ifdef CONFIG_SMP -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { smp_send_debugger_break(); } diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 4f04c6638a4d..cc57630f6bf2 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -319,7 +319,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundu
[PATCH v5 0/4] kgdb: Fix kgdb_roundup_cpus()
This series was originally part of the series ("serial: Finish kgdb on qcom_geni; fix many lockdep splats w/ kgdb") but it made sense to split it up. It's believed that dropping into kgdb should be more robust once these patches are applied. Changes in v5: - Add a comment about get_irq_regs(). - get_cpu() => raw_smp_processor_id() in kgdb_roundup_cpus(). - for_each_cpu() => for_each_online_cpu() - Error check smp_call_function_single_async() Changes in v4: - Removed smp_mb() calls. - Also clear out .debuggerinfo. - Also clear out .debuggerinfo and .task for the master. - Remove clearing out in kdb_stub for offline CPUs; it's now redundant. Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. - Don't round up a CPU that failed rounding up before new for v3. - Don't back trace on a cpu that didn't round up new for v3. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). Douglas Anderson (4): kgdb: Remove irq flags from roundup kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function() kgdb: Don't round up a CPU that failed rounding up before kdb: Don't back trace on a cpu that didn't round up arch/arc/kernel/kgdb.c | 10 + arch/arm/kernel/kgdb.c | 12 -- arch/arm64/kernel/kgdb.c| 12 -- arch/hexagon/kernel/kgdb.c | 32 arch/mips/kernel/kgdb.c | 9 + arch/powerpc/kernel/kgdb.c | 6 +-- arch/sh/kernel/kgdb.c | 12 -- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 + include/linux/kgdb.h| 22 +++ kernel/debug/debug_core.c | 65 - kernel/debug/debug_core.h | 1 + kernel/debug/kdb/kdb_bt.c | 11 +- kernel/debug/kdb/kdb_debugger.c | 7 14 files changed, 98 insertions(+), 112 deletions(-) -- 2.20.0.rc0.387.gc7a69e6b6c-goog
[PATCH v4 2/4] kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function()
When I had lockdep turned on and dropped into kgdb I got a nice splat on my system. Specifically it hit: DEBUG_LOCKS_WARN_ON(current->hardirq_context) Specifically it looked like this: sysrq: SysRq : DEBUG [ cut here ] DEBUG_LOCKS_WARN_ON(current->hardirq_context) WARNING: CPU: 0 PID: 0 at .../kernel/locking/lockdep.c:2875 lockdep_hardirqs_on+0xf0/0x160 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #27 pstate: 604003c9 (nZCv DAIF +PAN -UAO) pc : lockdep_hardirqs_on+0xf0/0x160 ... Call trace: lockdep_hardirqs_on+0xf0/0x160 trace_hardirqs_on+0x188/0x1ac kgdb_roundup_cpus+0x14/0x3c kgdb_cpu_enter+0x53c/0x5cc kgdb_handle_exception+0x180/0x1d4 kgdb_compiled_brk_fn+0x30/0x3c brk_handler+0x134/0x178 do_debug_exception+0xfc/0x178 el1_dbg+0x18/0x78 kgdb_breakpoint+0x34/0x58 sysrq_handle_dbg+0x54/0x5c __handle_sysrq+0x114/0x21c handle_sysrq+0x30/0x3c qcom_geni_serial_isr+0x2dc/0x30c ... ... irq event stamp: ...45 hardirqs last enabled at (...44): [...] __do_softirq+0xd8/0x4e4 hardirqs last disabled at (...45): [...] el1_irq+0x74/0x130 softirqs last enabled at (...42): [...] _local_bh_enable+0x2c/0x34 softirqs last disabled at (...43): [...] irq_exit+0xa8/0x100 ---[ end trace adf21f830c46e638 ]--- Looking closely at it, it seems like a really bad idea to be calling local_irq_enable() in kgdb_roundup_cpus(). If nothing else that seems like it could violate spinlock semantics and cause a deadlock. Instead, let's use a private csd alongside smp_call_function_single_async() to round up the other CPUs. Using smp_call_function_single_async() doesn't require interrupts to be enabled so we can remove the offending bit of code. In order to avoid duplicating this across all the architectures that use the default kgdb_roundup_cpus(), we'll add a "weak" implementation to debug_core.c. Looking at all the people who previously had copies of this code, there were a few variants. I've attempted to keep the variants working like they used to. Specifically: * For arch/arc we passed NULL to kgdb_nmicallback() instead of get_irq_regs(). * For arch/mips there was a bit of extra code around kgdb_nmicallback() NOTE: In this patch we will still get into trouble if we try to round up a CPU that failed to round up before. We'll try to round it up again and potentially hang when we try to grab the csd lock. That's not new behavior but we'll still try to do better in a future patch. Suggested-by: Daniel Thompson Signed-off-by: Douglas Anderson --- Changes in v4: None Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). arch/arc/kernel/kgdb.c | 10 ++ arch/arm/kernel/kgdb.c | 12 arch/arm64/kernel/kgdb.c | 12 arch/hexagon/kernel/kgdb.c | 27 --- arch/mips/kernel/kgdb.c| 9 + arch/powerpc/kernel/kgdb.c | 4 ++-- arch/sh/kernel/kgdb.c | 12 include/linux/kgdb.h | 15 +-- kernel/debug/debug_core.c | 35 +++ 9 files changed, 53 insertions(+), 83 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 0932851028e0..68d9fe4b5aa7 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -192,18 +192,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) instruction_pointer(regs) = ip; } -static void kgdb_call_nmi_hook(void *ignored) +void kgdb_call_nmi_hook(void *ignored) { + /* Default implementation passes get_irq_regs() but we don't */ kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - struct kgdb_arch arch_kgdb_ops = { /* breakpoint instruction: TRAP_S 0x3 */ #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index f21077b077be..d9a69e941463 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -170,18 +170,6 @@ static struct undef_hook kgdb_compiled_brkpt_hook = { .fn = kgdb_compiled_brk_fn }; -static void kgdb_call_nmi_hook(void *ignored) -{ - kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); -} - -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs
[PATCH v4 1/4] kgdb: Remove irq flags from roundup
The function kgdb_roundup_cpus() was passed a parameter that was documented as: > the flags that will be used when restoring the interrupts. There is > local_irq_save() call before kgdb_roundup_cpus(). Nobody used those flags. Anyone who wanted to temporarily turn on interrupts just did local_irq_enable() and local_irq_disable() without looking at them. So we can definitely remove the flags. Signed-off-by: Douglas Anderson --- Changes in v4: None Changes in v3: None Changes in v2: - Removing irq flags separated from fixing lockdep splat. arch/arc/kernel/kgdb.c | 2 +- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/kgdb.c | 2 +- arch/hexagon/kernel/kgdb.c | 9 ++--- arch/mips/kernel/kgdb.c| 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/sh/kernel/kgdb.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++--- include/linux/kgdb.h | 9 ++--- kernel/debug/debug_core.c | 2 +- 11 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 9a3c34af2ae8..0932851028e0 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -197,7 +197,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index caa0dbe3dc61..f21077b077be 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -175,7 +175,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index a20de58061a8..12c339ff6e75 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -289,7 +289,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c index 16c24b22d0b2..012e0e230ac2 100644 --- a/arch/hexagon/kernel/kgdb.c +++ b/arch/hexagon/kernel/kgdb.c @@ -119,17 +119,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) /** * kgdb_roundup_cpus - Get other CPUs into a holding pattern - * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs * and get them be in a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, - * in case of MIPS, smp_call_function() is used to roundup CPUs. In - * this case, we have to make sure that interrupts are enabled before - * calling smp_call_function(). The argument to this function is - * the flags that will be used when restoring the interrupts. There is - * local_irq_save() call before kgdb_roundup_cpus(). + * in case of MIPS, smp_call_function() is used to roundup CPUs. * * On non-SMP systems, this is not called. */ @@ -139,7 +134,7 @@ static void hexagon_kgdb_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(hexagon_kgdb_nmi_hook, NULL, 0); diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index eb6c0d582626..2b05effc17b4 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -219,7 +219,7 @@ static void kgdb_call_nmi_hook(void *ignored) set_fs(old_fs); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 59c578f865aa..b0e804844be0 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -124,7 +124,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) } #ifdef CONFIG_SMP -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { smp_send_debugger_break(); } diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 4f04c6638a4d..cc57630f6bf2 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -319,7 +319,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_
[PATCH v4 0/4] kgdb: Fix kgdb_roundup_cpus()
This series was originally part of the series ("serial: Finish kgdb on qcom_geni; fix many lockdep splats w/ kgdb") but it made sense to split it up. It's believed that dropping into kgdb should be more robust once these patches are applied. Changes in v4: - Removed smp_mb() calls. - Also clear out .debuggerinfo. - Also clear out .debuggerinfo and .task for the master. - Remove clearing out in kdb_stub for offline CPUs; it's now redundant. Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. - Don't round up a CPU that failed rounding up before new for v3. - Don't back trace on a cpu that didn't round up new for v3. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). Douglas Anderson (4): kgdb: Remove irq flags from roundup kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function() kgdb: Don't round up a CPU that failed rounding up before kdb: Don't back trace on a cpu that didn't round up arch/arc/kernel/kgdb.c | 10 ++ arch/arm/kernel/kgdb.c | 12 --- arch/arm64/kernel/kgdb.c| 12 --- arch/hexagon/kernel/kgdb.c | 32 --- arch/mips/kernel/kgdb.c | 9 +- arch/powerpc/kernel/kgdb.c | 6 ++-- arch/sh/kernel/kgdb.c | 12 --- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++ include/linux/kgdb.h| 22 - kernel/debug/debug_core.c | 56 - kernel/debug/debug_core.h | 1 + kernel/debug/kdb/kdb_bt.c | 11 ++- kernel/debug/kdb/kdb_debugger.c | 7 - 14 files changed, 89 insertions(+), 112 deletions(-) -- 2.19.1.930.g4563a0d9d0-goog
[PATCH v3 2/4] kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function()
When I had lockdep turned on and dropped into kgdb I got a nice splat on my system. Specifically it hit: DEBUG_LOCKS_WARN_ON(current->hardirq_context) Specifically it looked like this: sysrq: SysRq : DEBUG [ cut here ] DEBUG_LOCKS_WARN_ON(current->hardirq_context) WARNING: CPU: 0 PID: 0 at .../kernel/locking/lockdep.c:2875 lockdep_hardirqs_on+0xf0/0x160 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #27 pstate: 604003c9 (nZCv DAIF +PAN -UAO) pc : lockdep_hardirqs_on+0xf0/0x160 ... Call trace: lockdep_hardirqs_on+0xf0/0x160 trace_hardirqs_on+0x188/0x1ac kgdb_roundup_cpus+0x14/0x3c kgdb_cpu_enter+0x53c/0x5cc kgdb_handle_exception+0x180/0x1d4 kgdb_compiled_brk_fn+0x30/0x3c brk_handler+0x134/0x178 do_debug_exception+0xfc/0x178 el1_dbg+0x18/0x78 kgdb_breakpoint+0x34/0x58 sysrq_handle_dbg+0x54/0x5c __handle_sysrq+0x114/0x21c handle_sysrq+0x30/0x3c qcom_geni_serial_isr+0x2dc/0x30c ... ... irq event stamp: ...45 hardirqs last enabled at (...44): [...] __do_softirq+0xd8/0x4e4 hardirqs last disabled at (...45): [...] el1_irq+0x74/0x130 softirqs last enabled at (...42): [...] _local_bh_enable+0x2c/0x34 softirqs last disabled at (...43): [...] irq_exit+0xa8/0x100 ---[ end trace adf21f830c46e638 ]--- Looking closely at it, it seems like a really bad idea to be calling local_irq_enable() in kgdb_roundup_cpus(). If nothing else that seems like it could violate spinlock semantics and cause a deadlock. Instead, let's use a private csd alongside smp_call_function_single_async() to round up the other CPUs. Using smp_call_function_single_async() doesn't require interrupts to be enabled so we can remove the offending bit of code. In order to avoid duplicating this across all the architectures that use the default kgdb_roundup_cpus(), we'll add a "weak" implementation to debug_core.c. Looking at all the people who previously had copies of this code, there were a few variants. I've attempted to keep the variants working like they used to. Specifically: * For arch/arc we passed NULL to kgdb_nmicallback() instead of get_irq_regs(). * For arch/mips there was a bit of extra code around kgdb_nmicallback() NOTE: In this patch we will still get into trouble if we try to round up a CPU that failed to round up before. We'll try to round it up again and potentially hang when we try to grab the csd lock. That's not new behavior but we'll still try to do better in a future patch. Suggested-by: Daniel Thompson Signed-off-by: Douglas Anderson --- Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). arch/arc/kernel/kgdb.c | 10 ++ arch/arm/kernel/kgdb.c | 12 arch/arm64/kernel/kgdb.c | 12 arch/hexagon/kernel/kgdb.c | 27 --- arch/mips/kernel/kgdb.c| 9 + arch/powerpc/kernel/kgdb.c | 4 ++-- arch/sh/kernel/kgdb.c | 12 include/linux/kgdb.h | 15 +-- kernel/debug/debug_core.c | 35 +++ 9 files changed, 53 insertions(+), 83 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 0932851028e0..68d9fe4b5aa7 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -192,18 +192,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) instruction_pointer(regs) = ip; } -static void kgdb_call_nmi_hook(void *ignored) +void kgdb_call_nmi_hook(void *ignored) { + /* Default implementation passes get_irq_regs() but we don't */ kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - struct kgdb_arch arch_kgdb_ops = { /* breakpoint instruction: TRAP_S 0x3 */ #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index f21077b077be..d9a69e941463 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -170,18 +170,6 @@ static struct undef_hook kgdb_compiled_brkpt_hook = { .fn = kgdb_compiled_brk_fn }; -static void kgdb_call_nmi_hook(void *ignored) -{ - kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); -} - -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args
[PATCH v3 1/4] kgdb: Remove irq flags from roundup
The function kgdb_roundup_cpus() was passed a parameter that was documented as: > the flags that will be used when restoring the interrupts. There is > local_irq_save() call before kgdb_roundup_cpus(). Nobody used those flags. Anyone who wanted to temporarily turn on interrupts just did local_irq_enable() and local_irq_disable() without looking at them. So we can definitely remove the flags. Signed-off-by: Douglas Anderson --- Changes in v3: None Changes in v2: - Removing irq flags separated from fixing lockdep splat. arch/arc/kernel/kgdb.c | 2 +- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/kgdb.c | 2 +- arch/hexagon/kernel/kgdb.c | 9 ++--- arch/mips/kernel/kgdb.c| 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/sh/kernel/kgdb.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++--- include/linux/kgdb.h | 9 ++--- kernel/debug/debug_core.c | 2 +- 11 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 9a3c34af2ae8..0932851028e0 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -197,7 +197,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index caa0dbe3dc61..f21077b077be 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -175,7 +175,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index a20de58061a8..12c339ff6e75 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -289,7 +289,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c index 16c24b22d0b2..012e0e230ac2 100644 --- a/arch/hexagon/kernel/kgdb.c +++ b/arch/hexagon/kernel/kgdb.c @@ -119,17 +119,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) /** * kgdb_roundup_cpus - Get other CPUs into a holding pattern - * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs * and get them be in a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, - * in case of MIPS, smp_call_function() is used to roundup CPUs. In - * this case, we have to make sure that interrupts are enabled before - * calling smp_call_function(). The argument to this function is - * the flags that will be used when restoring the interrupts. There is - * local_irq_save() call before kgdb_roundup_cpus(). + * in case of MIPS, smp_call_function() is used to roundup CPUs. * * On non-SMP systems, this is not called. */ @@ -139,7 +134,7 @@ static void hexagon_kgdb_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(hexagon_kgdb_nmi_hook, NULL, 0); diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index eb6c0d582626..2b05effc17b4 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -219,7 +219,7 @@ static void kgdb_call_nmi_hook(void *ignored) set_fs(old_fs); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 59c578f865aa..b0e804844be0 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -124,7 +124,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) } #ifdef CONFIG_SMP -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { smp_send_debugger_break(); } diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 4f04c6638a4d..cc57630f6bf2 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -319,7 +319,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_fu
[PATCH v3 0/4] kgdb: Fix kgdb_roundup_cpus()
This series was originally part of the series ("serial: Finish kgdb on qcom_geni; fix many lockdep splats w/ kgdb") but it made sense to split it up. It's believed that dropping into kgdb should be more robust once these patches are applied. Changes in v3: - No separate init call. - Don't round up the CPU that is doing the rounding up. - Add "#ifdef CONFIG_SMP" to match the rest of the file. - Updated desc saying we don't solve the "failed to roundup" case. - Document the ignored parameter. - Patch #3 and #4 are new. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). Douglas Anderson (4): kgdb: Remove irq flags from roundup kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function() kgdb: Don't round up a CPU that failed rounding up before kdb: Don't back trace on a cpu that didn't round up arch/arc/kernel/kgdb.c | 10 ++- arch/arm/kernel/kgdb.c | 12 - arch/arm64/kernel/kgdb.c | 12 - arch/hexagon/kernel/kgdb.c | 32 -- arch/mips/kernel/kgdb.c| 9 +-- arch/powerpc/kernel/kgdb.c | 6 ++--- arch/sh/kernel/kgdb.c | 12 - arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++- include/linux/kgdb.h | 22 +-- kernel/debug/debug_core.c | 55 +- kernel/debug/debug_core.h | 1 + kernel/debug/kdb/kdb_bt.c | 11 +++- 13 files changed, 88 insertions(+), 105 deletions(-) -- 2.19.1.930.g4563a0d9d0-goog
[PATCH v2 2/2] kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function()
When I had lockdep turned on and dropped into kgdb I got a nice splat on my system. Specifically it hit: DEBUG_LOCKS_WARN_ON(current->hardirq_context) Specifically it looked like this: sysrq: SysRq : DEBUG [ cut here ] DEBUG_LOCKS_WARN_ON(current->hardirq_context) WARNING: CPU: 0 PID: 0 at .../kernel/locking/lockdep.c:2875 lockdep_hardirqs_on+0xf0/0x160 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #27 pstate: 604003c9 (nZCv DAIF +PAN -UAO) pc : lockdep_hardirqs_on+0xf0/0x160 ... Call trace: lockdep_hardirqs_on+0xf0/0x160 trace_hardirqs_on+0x188/0x1ac kgdb_roundup_cpus+0x14/0x3c kgdb_cpu_enter+0x53c/0x5cc kgdb_handle_exception+0x180/0x1d4 kgdb_compiled_brk_fn+0x30/0x3c brk_handler+0x134/0x178 do_debug_exception+0xfc/0x178 el1_dbg+0x18/0x78 kgdb_breakpoint+0x34/0x58 sysrq_handle_dbg+0x54/0x5c __handle_sysrq+0x114/0x21c handle_sysrq+0x30/0x3c qcom_geni_serial_isr+0x2dc/0x30c ... ... irq event stamp: ...45 hardirqs last enabled at (...44): [...] __do_softirq+0xd8/0x4e4 hardirqs last disabled at (...45): [...] el1_irq+0x74/0x130 softirqs last enabled at (...42): [...] _local_bh_enable+0x2c/0x34 softirqs last disabled at (...43): [...] irq_exit+0xa8/0x100 ---[ end trace adf21f830c46e638 ]--- Looking closely at it, it seems like a really bad idea to be calling local_irq_enable() in kgdb_roundup_cpus(). If nothing else that seems like it could violate spinlock semantics and cause a deadlock. Instead, let's use a private csd alongside smp_call_function_single_async() to round up the other CPUs. Using smp_call_function_single_async() doesn't require interrupts to be enabled so we can remove the offending bit of code. In order to avoid duplicating this across all the architectures that use the default kgdb_roundup_cpus(), we'll add a "weak" implementation to debug_core.c. Looking at all the people who previously had copies of this code, there were a few variants. I've attempted to keep the variants working like they used to. Specifically: * For arch/arc we passed NULL to kgdb_nmicallback() instead of get_irq_regs(). * For arch/mips there was a bit of extra code around kgdb_nmicallback() Suggested-by: Daniel Thompson Signed-off-by: Douglas Anderson --- Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). arch/arc/kernel/kgdb.c | 10 ++ arch/arm/kernel/kgdb.c | 12 arch/arm64/kernel/kgdb.c | 12 arch/hexagon/kernel/kgdb.c | 27 --- arch/mips/kernel/kgdb.c| 9 + arch/powerpc/kernel/kgdb.c | 4 ++-- arch/sh/kernel/kgdb.c | 12 include/linux/kgdb.h | 15 +-- kernel/debug/debug_core.c | 36 9 files changed, 54 insertions(+), 83 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 0932851028e0..68d9fe4b5aa7 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -192,18 +192,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) instruction_pointer(regs) = ip; } -static void kgdb_call_nmi_hook(void *ignored) +void kgdb_call_nmi_hook(void *ignored) { + /* Default implementation passes get_irq_regs() but we don't */ kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - struct kgdb_arch arch_kgdb_ops = { /* breakpoint instruction: TRAP_S 0x3 */ #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index f21077b077be..d9a69e941463 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -170,18 +170,6 @@ static struct undef_hook kgdb_compiled_brkpt_hook = { .fn = kgdb_compiled_brk_fn }; -static void kgdb_call_nmi_hook(void *ignored) -{ - kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); -} - -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args->regs; diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 12c339ff6e75..da880247c734 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -284,18 +284,6 @@ static struct step_hook kgdb_step_hook = { .fn = kgdb_step_brk_fn }; -static void kgdb_call_nmi_hook(void *ignored) -{ - kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); -} - -void kgdb_roundup_cpus(void) -{ - local_irq_enable(); - smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); -} - static int __kgd
[PATCH v2 1/2] kgdb: Remove irq flags from roundup
The function kgdb_roundup_cpus() was passed a parameter that was documented as: > the flags that will be used when restoring the interrupts. There is > local_irq_save() call before kgdb_roundup_cpus(). Nobody used those flags. Anyone who wanted to temporarily turn on interrupts just did local_irq_enable() and local_irq_disable() without looking at them. So we can definitely remove the flags. Signed-off-by: Douglas Anderson --- Changes in v2: - Removing irq flags separated from fixing lockdep splat. arch/arc/kernel/kgdb.c | 2 +- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/kgdb.c | 2 +- arch/hexagon/kernel/kgdb.c | 9 ++--- arch/mips/kernel/kgdb.c| 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/sh/kernel/kgdb.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++--- include/linux/kgdb.h | 9 ++--- kernel/debug/debug_core.c | 2 +- 11 files changed, 14 insertions(+), 29 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 9a3c34af2ae8..0932851028e0 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -197,7 +197,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index caa0dbe3dc61..f21077b077be 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -175,7 +175,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index a20de58061a8..12c339ff6e75 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -289,7 +289,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c index 16c24b22d0b2..012e0e230ac2 100644 --- a/arch/hexagon/kernel/kgdb.c +++ b/arch/hexagon/kernel/kgdb.c @@ -119,17 +119,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) /** * kgdb_roundup_cpus - Get other CPUs into a holding pattern - * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs * and get them be in a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, - * in case of MIPS, smp_call_function() is used to roundup CPUs. In - * this case, we have to make sure that interrupts are enabled before - * calling smp_call_function(). The argument to this function is - * the flags that will be used when restoring the interrupts. There is - * local_irq_save() call before kgdb_roundup_cpus(). + * in case of MIPS, smp_call_function() is used to roundup CPUs. * * On non-SMP systems, this is not called. */ @@ -139,7 +134,7 @@ static void hexagon_kgdb_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(hexagon_kgdb_nmi_hook, NULL, 0); diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index eb6c0d582626..2b05effc17b4 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -219,7 +219,7 @@ static void kgdb_call_nmi_hook(void *ignored) set_fs(old_fs); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 59c578f865aa..b0e804844be0 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -124,7 +124,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) } #ifdef CONFIG_SMP -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { smp_send_debugger_break(); } diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 4f04c6638a4d..cc57630f6bf2 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -319,7 +319,7 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { local_irq_enable(); smp_call_function(kgdb_call_nm
[PATCH v2 0/2] kgdb: Fix kgdb_roundup_cpus()
This series was originally part of the series ("serial: Finish kgdb on qcom_geni; fix many lockdep splats w/ kgdb") but it made sense to split it up. It's believed that dropping into kgdb should be more robust once these patches are applied. Changes in v2: - Removing irq flags separated from fixing lockdep splat. - Don't use smp_call_function (Daniel). Douglas Anderson (2): kgdb: Remove irq flags from roundup kgdb: Fix kgdb_roundup_cpus() for arches who used smp_call_function() arch/arc/kernel/kgdb.c | 10 ++ arch/arm/kernel/kgdb.c | 12 arch/arm64/kernel/kgdb.c | 12 arch/hexagon/kernel/kgdb.c | 32 arch/mips/kernel/kgdb.c| 9 + arch/powerpc/kernel/kgdb.c | 6 +++--- arch/sh/kernel/kgdb.c | 12 arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++--- include/linux/kgdb.h | 22 ++ kernel/debug/debug_core.c | 38 +- 11 files changed, 60 insertions(+), 104 deletions(-) -- 2.19.1.568.g152ad8e336-goog
[PATCH 0/7] serial: Finish kgdb on qcom_geni; fix many lockdep splats w/ kgdb
I started out this series trying to make sysrq work over the serial console on qcom_geni_serial, then fell into a rat's nest. To solve the deadlock I faced when enabling sysrq I tried to borrow code from '8250_port.c' which avoided grabbing the port lock in console_write(). ...but since these days I try to run with lockdep on all the time, I found it caused an annoying lockdep splat (which I also reproduced on my rk3399 board). ...so I instead changed my qcom_geni_serial solution to borrow code from 'msm_serial.c' I wasn't super happy with the solution in 'msm_serial.c' though. I don't like releasing the spinlock there. Not only is it ugly but it means we are unlocking / re-locking _all the time_ even though sysrq characters are rare. ...so I came up with what I think is a better solution and then implemented it for qcom_geni_serial. Since I had a good way to test 8250-based UARTs, I also fixed that driver to use my new method. When doing so, I ran into a missing msm_serial.c at all, so I didn't switch that (or all other serial drivers for that matter) to the new method. After fixing all the above issues, I found the next lockdep splat in kgdb and I think I've worked around it in a good-enough way, but I'm much less confident about this. Hopefully folks can take a look at it. In general, patches earlier in this series should be "less controversial" and hopefully can land even if people don't like patches later in the series. ;-) Looking back, this is pretty much two series squashed that could be treated indepdently. The first is a serial series and the second is a kgdb series. For all serial patches I'd expect them to go through the tty tree once they've been reviewed. If folks are OK w/ the 'smp' patch it probably should go in some core kernel tree. The kgdb patch won't work without it, though, so to land that we'd need coordination between the folks landing that and the folks landing the 'smp' patch. Douglas Anderson (7): serial: qcom_geni_serial: Finish supporting sysrq serial: core: Allow processing sysrq at port unlock time serial: qcom_geni_serial: Process sysrq at port unlock time serial: core: Include console.h from serial_core.h serial: 8250: Process sysrq at port unlock time smp: Don't yell about IRQs disabled in kgdb_roundup_cpus() kgdb: Remove irq flags and local_irq_enable/disable from roundup arch/arc/kernel/kgdb.c | 4 +-- arch/arm/kernel/kgdb.c | 4 +-- arch/arm64/kernel/kgdb.c| 4 +-- arch/hexagon/kernel/kgdb.c | 11 ++ arch/mips/kernel/kgdb.c | 4 +-- arch/powerpc/kernel/kgdb.c | 2 +- arch/sh/kernel/kgdb.c | 4 +-- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++--- drivers/tty/serial/8250/8250_aspeed_vuart.c | 6 +++- drivers/tty/serial/8250/8250_fsl.c | 6 +++- drivers/tty/serial/8250/8250_omap.c | 6 +++- drivers/tty/serial/8250/8250_port.c | 8 ++--- drivers/tty/serial/qcom_geni_serial.c | 10 -- include/linux/kgdb.h| 9 ++--- include/linux/serial_core.h | 38 - kernel/debug/debug_core.c | 2 +- kernel/smp.c| 4 ++- 18 files changed, 80 insertions(+), 53 deletions(-) -- 2.19.1.568.g152ad8e336-goog
[PATCH 7/7] kgdb: Remove irq flags and local_irq_enable/disable from roundup
The function kgdb_roundup_cpus() was passed a parameter that was documented as: > the flags that will be used when restoring the interrupts. There is > local_irq_save() call before kgdb_roundup_cpus(). Nobody used those flags. Anyone who wanted to temporarily turn on interrupts just did local_irq_enable() and local_irq_disable() without looking at them. So we can definitely remove the flags. Speaking of calling local_irq_enable(), it seems like a bad idea and it caused a nice splat on my system with lockdep turned on. Specifically it hit: DEBUG_LOCKS_WARN_ON(current->hardirq_context) See the previous patch in this series ("smp: Don't yell about IRQs disabled in kgdb_roundup_cpus()") for more details, but the last few things on the stack were this on my arm64 board: lockdep_hardirqs_on+0xf0/0x160 trace_hardirqs_on+0x188/0x1ac kgdb_roundup_cpus+0x14/0x3c As agrued in the the commit text of ("smp: Don't yell about IRQs disabled in kgdb_roundup_cpus()"), it seems better to make smp_call_function() lenient about kgdb than to locally turn on IRQs here. Thus let's totally remove all the local_irq_enable() and local_irq_disable() calls from all of the kgdb_roundup_cpus() calls. Signed-off-by: Douglas Anderson --- arch/arc/kernel/kgdb.c | 4 +--- arch/arm/kernel/kgdb.c | 4 +--- arch/arm64/kernel/kgdb.c | 4 +--- arch/hexagon/kernel/kgdb.c | 11 ++- arch/mips/kernel/kgdb.c| 4 +--- arch/powerpc/kernel/kgdb.c | 2 +- arch/sh/kernel/kgdb.c | 4 +--- arch/sparc/kernel/smp_64.c | 2 +- arch/x86/kernel/kgdb.c | 9 ++--- include/linux/kgdb.h | 9 ++--- kernel/debug/debug_core.c | 2 +- 11 files changed, 14 insertions(+), 41 deletions(-) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index 9a3c34af2ae8..d94d3cb7f9e8 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -197,11 +197,9 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), NULL); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { - local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); } struct kgdb_arch arch_kgdb_ops = { diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index caa0dbe3dc61..a80e9259f7e9 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -175,11 +175,9 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { - local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); } static int __kgdb_notify(struct die_args *args, unsigned long cmd) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index a20de58061a8..5d171c26788f 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -289,11 +289,9 @@ static void kgdb_call_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { - local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); - local_irq_disable(); } static int __kgdb_notify(struct die_args *args, unsigned long cmd) diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c index 16c24b22d0b2..30fbc491cf45 100644 --- a/arch/hexagon/kernel/kgdb.c +++ b/arch/hexagon/kernel/kgdb.c @@ -119,17 +119,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) /** * kgdb_roundup_cpus - Get other CPUs into a holding pattern - * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs * and get them be in a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, - * in case of MIPS, smp_call_function() is used to roundup CPUs. In - * this case, we have to make sure that interrupts are enabled before - * calling smp_call_function(). The argument to this function is - * the flags that will be used when restoring the interrupts. There is - * local_irq_save() call before kgdb_roundup_cpus(). + * in case of MIPS, smp_call_function() is used to roundup CPUs. * * On non-SMP systems, this is not called. */ @@ -139,11 +134,9 @@ static void hexagon_kgdb_nmi_hook(void *ignored) kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); } -void kgdb_roundup_cpus(unsigned long flags) +void kgdb_roundup_cpus(void) { - local_irq_enable(); smp_call_function(hexagon_kgdb_nmi_hook, NULL, 0); - local_irq_disable(); } #endif diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index eb6c0d582626..6671a279966f 100644 --- a/arch/mips/kernel/kgd
[PATCH 6/7] smp: Don't yell about IRQs disabled in kgdb_roundup_cpus()
In kgdb_roundup_cpus() we've got code that looks like: local_irq_enable(); smp_call_function(kgdb_call_nmi_hook, NULL, 0); local_irq_disable(); In certain cases when we drop into kgdb (like with sysrq-g on a serial console) we'll get a big yell that looks like: sysrq: SysRq : DEBUG [ cut here ] DEBUG_LOCKS_WARN_ON(current->hardirq_context) WARNING: CPU: 0 PID: 0 at .../kernel/locking/lockdep.c:2875 lockdep_hardirqs_on+0xf0/0x160 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #27 pstate: 604003c9 (nZCv DAIF +PAN -UAO) pc : lockdep_hardirqs_on+0xf0/0x160 ... Call trace: lockdep_hardirqs_on+0xf0/0x160 trace_hardirqs_on+0x188/0x1ac kgdb_roundup_cpus+0x14/0x3c kgdb_cpu_enter+0x53c/0x5cc kgdb_handle_exception+0x180/0x1d4 kgdb_compiled_brk_fn+0x30/0x3c brk_handler+0x134/0x178 do_debug_exception+0xfc/0x178 el1_dbg+0x18/0x78 kgdb_breakpoint+0x34/0x58 sysrq_handle_dbg+0x54/0x5c __handle_sysrq+0x114/0x21c handle_sysrq+0x30/0x3c qcom_geni_serial_isr+0x2dc/0x30c ... ... irq event stamp: ...45 hardirqs last enabled at (...44): [...] __do_softirq+0xd8/0x4e4 hardirqs last disabled at (...45): [...] el1_irq+0x74/0x130 softirqs last enabled at (...42): [...] _local_bh_enable+0x2c/0x34 softirqs last disabled at (...43): [...] irq_exit+0xa8/0x100 ---[ end trace adf21f830c46e638 ]--- Let's add kgdb to the list of reasons not to warn in smp_call_function_many(). That will allow us (in a future patch) to stop calling local_irq_enable() which will get rid of the original splat. NOTE: with this change comes the obvious question: will we start deadlocking more often now when we drop into the debugger. I can't say that for sure one way or the other, but the fact that we do the same logic for "oops_in_progress" makes me feel like it shouldn't happen too often. Also note that the old logic of turning on interrupts temporarily wasn't exactly safe since (I presume) that could have violated spin_lock_irqsave() semantics and ended up with a deadlock of its own. Signed-off-by: Douglas Anderson --- kernel/smp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/smp.c b/kernel/smp.c index 163c451af42e..bb581e58c8dc 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "smpboot.h" @@ -413,7 +414,8 @@ void smp_call_function_many(const struct cpumask *mask, * can't happen. */ WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() -&& !oops_in_progress && !early_boot_irqs_disabled); +&& !oops_in_progress && !early_boot_irqs_disabled +&& !in_dbg_master()); /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ cpu = cpumask_first_and(mask, cpu_online_mask); -- 2.19.1.568.g152ad8e336-goog