Cliff Wickman wrote:
Hi Cliff,
> Hi Keith,
>
> On Wed, Oct 29, 2008 at 03:57:25PM +1100, Keith Owens wrote:
>> However there is a separate problem with your patch. You now wait in
>> smp_kdb_stop() until all cpus are in KDB. If any cpu is completely
>> hung so it cannot be interrupted then smp_kdb_stop() will never return
>> and KDB will now appear to hang.
>>
>> The existing code avoids this by
>>
>> kdb() -> smp_kdb_stop() - issue KDB_VECTOR as normal interrupt but do not
>> wait for cpus
>> kdb() -> kdba_main_loop()
>> kdba_main_loop() -> kdb_save_running()
>> kdb_save_running() -> kdb_main_loop()
>> kdb_main_loop() -> kdb_wait_for_cpus()
>>
>> kdb_wait_for_cpus() waits until the other cpus are in KDB. If a cpu
>> does not respond to KDB_VECTOR after a few seconds then
>> kdb_wait_for_cpus() hits the missing cpus with NMI.
>>
>> This two step approach (send KDB_VECTOR as normal interrupt, wait then
>> send NMI) is used because NMI can be serviced at any time, even when
>> the target cpu is in the middle of servicing an interrupt. This can
>> result in incomplete register state which leads to broken backtraces.
>> IOW, sending NMI first would actually make debugging harder.
>>
>> Given the above logic, if you are going to take over an existing
>> interrupt vector then the vector needs to be acquired near the start of
>> kdb() and released near the end of kdb(), and only on the master cpu.
>>
>> Note: there is no overwhelming need for KDB_VECTOR to have a high
>> priority. As long as it is received within a few seconds then all is
>> well.
>
> Thanks for the explanation. I see your point.
I will let Keith to comment on the logic of your code, but this patch
will cause IA64 compilation to fail because kdb_giveback_vector()
is not defined for IA64.
Suggestions:
1) change kdb_takeover_vector and kdb_giveback_vector to arch-dependent
version of kdba_takeover_vector and kdba_giveback_vector.
2) extern of kdba_giveback_vector should be moved to arch-dependent
kdb.h (ie, arch/{ia64,x86}/include/asm/kdb.h.) and the ia64 version
to be a dummy define.
3) kdbmain.c should change accordingly.
Thanks,
jay
>
> How about if we keep the two step approach, but take over the vector
> when we need it, in step one. Then give it back when the step two
> wait is over.
> (assuming we don't take over a vector needed for the NMI)
>
> Like this:
>
> ---
> arch/x86/kdb/kdbasupport_32.c | 22 ++++++++++++++++++----
> arch/x86/kdb/kdbasupport_64.c | 23 +++++++++++++++++++----
> include/asm-x86/irq_vectors.h | 11 ++++++-----
> include/linux/kdb.h | 1 +
> kdb/kdbmain.c | 2 ++
> 5 files changed, 46 insertions(+), 13 deletions(-)
>
> Index: 081002.linus/arch/x86/kdb/kdbasupport_32.c
> ===================================================================
> --- 081002.linus.orig/arch/x86/kdb/kdbasupport_32.c
> +++ 081002.linus/arch/x86/kdb/kdbasupport_32.c
> @@ -883,9 +883,6 @@ kdba_cpu_up(void)
> static int __init
> kdba_arch_init(void)
> {
> -#ifdef CONFIG_SMP
> - set_intr_gate(KDB_VECTOR, kdb_interrupt);
> -#endif
> set_intr_gate(KDBENTER_VECTOR, kdb_call);
> return 0;
> }
> @@ -1027,14 +1024,31 @@ kdba_verify_rw(unsigned long addr, size_
>
> #include <mach_ipi.h>
>
> +gate_desc save_idt[NR_VECTORS];
> +
> +void kdb_takeover_vector(int vector)
> +{
> + memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc));
> + set_intr_gate(KDB_VECTOR, kdb_interrupt);
> + return;
> +}
> +
> +void kdb_giveback_vector(int vector)
> +{
> + native_write_idt_entry(idt_table, vector, &save_idt[vector]);
> + return;
> +}
> +
> /* When first entering KDB, try a normal IPI. That reduces backtrace
> problems
> * on the other cpus.
> */
> void
> smp_kdb_stop(void)
> {
> - if (!KDB_FLAG(NOIPI))
> + if (!KDB_FLAG(NOIPI)) {
> + kdb_takeover_vector(KDB_VECTOR);
> send_IPI_allbutself(KDB_VECTOR);
> + }
> }
>
> /* The normal KDB IPI handler */
> Index: 081002.linus/arch/x86/kdb/kdbasupport_64.c
> ===================================================================
> --- 081002.linus.orig/arch/x86/kdb/kdbasupport_64.c
> +++ 081002.linus/arch/x86/kdb/kdbasupport_64.c
> @@ -21,6 +21,7 @@
> #include <linux/interrupt.h>
> #include <linux/module.h>
> #include <linux/kdebug.h>
> +#include <linux/cpumask.h>
> #include <asm/processor.h>
> #include <asm/msr.h>
> #include <asm/uaccess.h>
> @@ -900,9 +901,6 @@ kdba_cpu_up(void)
> static int __init
> kdba_arch_init(void)
> {
> -#ifdef CONFIG_SMP
> - set_intr_gate(KDB_VECTOR, kdb_interrupt);
> -#endif
> set_intr_gate(KDBENTER_VECTOR, kdb_call);
> return 0;
> }
> @@ -976,14 +974,31 @@ kdba_set_current_task(const struct task_
>
> #include <mach_ipi.h>
>
> +gate_desc save_idt[NR_VECTORS];
> +
> +void kdb_takeover_vector(int vector)
> +{
> + memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc));
> + set_intr_gate(KDB_VECTOR, kdb_interrupt);
> + return;
> +}
> +
> +void kdb_giveback_vector(int vector)
> +{
> + native_write_idt_entry(idt_table, vector, &save_idt[vector]);
> + return;
> +}
> +
> /* When first entering KDB, try a normal IPI. That reduces backtrace
> problems
> * on the other cpus.
> */
> void
> smp_kdb_stop(void)
> {
> - if (!KDB_FLAG(NOIPI))
> + if (!KDB_FLAG(NOIPI)) {
> + kdb_takeover_vector(KDB_VECTOR);
> send_IPI_allbutself(KDB_VECTOR);
> + }
> }
>
> /* The normal KDB IPI handler */
> Index: 081002.linus/include/asm-x86/irq_vectors.h
> ===================================================================
> --- 081002.linus.orig/include/asm-x86/irq_vectors.h
> +++ 081002.linus/include/asm-x86/irq_vectors.h
> @@ -66,7 +66,6 @@
> # define RESCHEDULE_VECTOR 0xfc
> # define CALL_FUNCTION_VECTOR 0xfb
> # define CALL_FUNCTION_SINGLE_VECTOR 0xfa
> -#define KDB_VECTOR 0xf9
> # define THERMAL_APIC_VECTOR 0xf0
>
> #else
> @@ -79,10 +78,6 @@
> #define THERMAL_APIC_VECTOR 0xfa
> #define THRESHOLD_APIC_VECTOR 0xf9
> #define UV_BAU_MESSAGE 0xf8
> -/* Overload KDB_VECTOR with UV_BAU_MESSAGE. By the time the UV hardware is
> - * ready, we should have moved to a dynamically allocated vector scheme.
> - */
> -#define KDB_VECTOR 0xf8
> #define INVALIDATE_TLB_VECTOR_END 0xf7
> #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
>
> @@ -91,6 +86,12 @@
> #endif
>
> /*
> + * KDB_VECTOR will take over vector 0xfe when it is needed, as in theory
> + * it should not be used anyway.
> + */
> +#define KDB_VECTOR 0xfe
> +
> +/*
> * Local APIC timer IRQ vector is on a different priority level,
> * to work around the 'lost local interrupt if more than 2 IRQ
> * sources per level' errata.
> Index: 081002.linus/include/linux/kdb.h
> ===================================================================
> --- 081002.linus.orig/include/linux/kdb.h
> +++ 081002.linus/include/linux/kdb.h
> @@ -89,6 +89,7 @@ extern volatile int kdb_flags; /* Glob
>
> extern void kdb_save_flags(void);
> extern void kdb_restore_flags(void);
> +extern void kdb_giveback_vector(int);
>
> #define KDB_FLAG(flag) (kdb_flags & KDB_FLAG_##flag)
> #define KDB_FLAG_SET(flag) ((void)(kdb_flags |= KDB_FLAG_##flag))
> Index: 081002.linus/kdb/kdbmain.c
> ===================================================================
> --- 081002.linus.orig/kdb/kdbmain.c
> +++ 081002.linus/kdb/kdbmain.c
> @@ -1673,6 +1673,8 @@ kdb_wait_for_cpus(void)
> wait == 1 ? " is" : "s are",
> wait == 1 ? "its" : "their");
> }
> + /* give back the vector we took over in smp_kdb_stop */
> + kdb_giveback_vector(KDB_VECTOR);
> #endif /* CONFIG_SMP */
> }
>
---------------------------
Use http://oss.sgi.com/ecartis to modify your settings or to unsubscribe.