Hi Keith,

On Wed, Oct 29, 2008 at 03:57:25PM +1100, Keith Owens wrote:
> However there is a separate problem with your patch.  You now wait in
> smp_kdb_stop() until all cpus are in KDB.  If any cpu is completely
> hung so it cannot be interrupted then smp_kdb_stop() will never return
> and KDB will now appear to hang.
> 
> The existing code avoids this by
> 
>   kdb() -> smp_kdb_stop() - issue KDB_VECTOR as normal interrupt but do not 
> wait for cpus
>   kdb() -> kdba_main_loop()
>     kdba_main_loop() -> kdb_save_running()
>       kdb_save_running() -> kdb_main_loop()
>         kdb_main_loop() -> kdb_wait_for_cpus()
> 
> kdb_wait_for_cpus() waits until the other cpus are in KDB.  If a cpu
> does not respond to KDB_VECTOR after a few seconds then
> kdb_wait_for_cpus() hits the missing cpus with NMI.
> 
> This two step approach (send KDB_VECTOR as normal interrupt, wait then
> send NMI) is used because NMI can be serviced at any time, even when
> the target cpu is in the middle of servicing an interrupt.  This can
> result in incomplete register state which leads to broken backtraces.
> IOW, sending NMI first would actually make debugging harder.
> 
> Given the above logic, if you are going to take over an existing
> interrupt vector then the vector needs to be acquired near the start of
> kdb() and released near the end of kdb(), and only on the master cpu.
> 
> Note: there is no overwhelming need for KDB_VECTOR to have a high
> priority.  As long as it is received within a few seconds then all is
> well.

Thanks for the explanation.  I see your point.

How about if we keep the two step approach, but take over the vector
when we need it, in step one.  Then give it back when the step two
 wait is over.
(assuming we don't take over a vector needed for the NMI)

Like this:

---
 arch/x86/kdb/kdbasupport_32.c |   22 ++++++++++++++++++----
 arch/x86/kdb/kdbasupport_64.c |   23 +++++++++++++++++++----
 include/asm-x86/irq_vectors.h |   11 ++++++-----
 include/linux/kdb.h           |    1 +
 kdb/kdbmain.c                 |    2 ++
 5 files changed, 46 insertions(+), 13 deletions(-)

Index: 081002.linus/arch/x86/kdb/kdbasupport_32.c
===================================================================
--- 081002.linus.orig/arch/x86/kdb/kdbasupport_32.c
+++ 081002.linus/arch/x86/kdb/kdbasupport_32.c
@@ -883,9 +883,6 @@ kdba_cpu_up(void)
 static int __init
 kdba_arch_init(void)
 {
-#ifdef CONFIG_SMP
-       set_intr_gate(KDB_VECTOR, kdb_interrupt);
-#endif
        set_intr_gate(KDBENTER_VECTOR, kdb_call);
        return 0;
 }
@@ -1027,14 +1024,31 @@ kdba_verify_rw(unsigned long addr, size_
 
 #include <mach_ipi.h>
 
+gate_desc save_idt[NR_VECTORS];
+
+void kdb_takeover_vector(int vector)
+{
+       memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc));
+       set_intr_gate(KDB_VECTOR, kdb_interrupt);
+       return;
+}
+
+void kdb_giveback_vector(int vector)
+{
+       native_write_idt_entry(idt_table, vector, &save_idt[vector]);
+       return;
+}
+
 /* When first entering KDB, try a normal IPI.  That reduces backtrace problems
  * on the other cpus.
  */
 void
 smp_kdb_stop(void)
 {
-       if (!KDB_FLAG(NOIPI))
+       if (!KDB_FLAG(NOIPI)) {
+               kdb_takeover_vector(KDB_VECTOR);
                send_IPI_allbutself(KDB_VECTOR);
+       }
 }
 
 /* The normal KDB IPI handler */
Index: 081002.linus/arch/x86/kdb/kdbasupport_64.c
===================================================================
--- 081002.linus.orig/arch/x86/kdb/kdbasupport_64.c
+++ 081002.linus/arch/x86/kdb/kdbasupport_64.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/kdebug.h>
+#include <linux/cpumask.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/uaccess.h>
@@ -900,9 +901,6 @@ kdba_cpu_up(void)
 static int __init
 kdba_arch_init(void)
 {
-#ifdef CONFIG_SMP
-       set_intr_gate(KDB_VECTOR, kdb_interrupt);
-#endif
        set_intr_gate(KDBENTER_VECTOR, kdb_call);
        return 0;
 }
@@ -976,14 +974,31 @@ kdba_set_current_task(const struct task_
 
 #include <mach_ipi.h>
 
+gate_desc save_idt[NR_VECTORS];
+
+void kdb_takeover_vector(int vector)
+{
+       memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc));
+       set_intr_gate(KDB_VECTOR, kdb_interrupt);
+       return;
+}
+
+void kdb_giveback_vector(int vector)
+{
+       native_write_idt_entry(idt_table, vector, &save_idt[vector]);
+       return;
+}
+
 /* When first entering KDB, try a normal IPI.  That reduces backtrace problems
  * on the other cpus.
  */
 void
 smp_kdb_stop(void)
 {
-       if (!KDB_FLAG(NOIPI))
+       if (!KDB_FLAG(NOIPI)) {
+               kdb_takeover_vector(KDB_VECTOR);
                send_IPI_allbutself(KDB_VECTOR);
+       }
 }
 
 /* The normal KDB IPI handler */
Index: 081002.linus/include/asm-x86/irq_vectors.h
===================================================================
--- 081002.linus.orig/include/asm-x86/irq_vectors.h
+++ 081002.linus/include/asm-x86/irq_vectors.h
@@ -66,7 +66,6 @@
 # define RESCHEDULE_VECTOR             0xfc
 # define CALL_FUNCTION_VECTOR          0xfb
 # define CALL_FUNCTION_SINGLE_VECTOR   0xfa
-#define        KDB_VECTOR                      0xf9
 # define THERMAL_APIC_VECTOR           0xf0
 
 #else
@@ -79,10 +78,6 @@
 #define THERMAL_APIC_VECTOR            0xfa
 #define THRESHOLD_APIC_VECTOR          0xf9
 #define UV_BAU_MESSAGE                 0xf8
-/* Overload KDB_VECTOR with UV_BAU_MESSAGE. By the time the UV hardware is
- * ready, we should have moved to a dynamically allocated vector scheme.
- */
-#define KDB_VECTOR                     0xf8
 #define INVALIDATE_TLB_VECTOR_END      0xf7
 #define INVALIDATE_TLB_VECTOR_START    0xf0    /* f0-f7 used for TLB flush */
 
@@ -91,6 +86,12 @@
 #endif
 
 /*
+ * KDB_VECTOR will take over vector 0xfe when it is needed, as in theory
+ * it should not be used anyway.
+ */
+#define KDB_VECTOR                     0xfe
+
+/*
  * Local APIC timer IRQ vector is on a different priority level,
  * to work around the 'lost local interrupt if more than 2 IRQ
  * sources per level' errata.
Index: 081002.linus/include/linux/kdb.h
===================================================================
--- 081002.linus.orig/include/linux/kdb.h
+++ 081002.linus/include/linux/kdb.h
@@ -89,6 +89,7 @@ extern volatile int kdb_flags;                        /* Glob
 
 extern void kdb_save_flags(void);
 extern void kdb_restore_flags(void);
+extern void kdb_giveback_vector(int);
 
 #define KDB_FLAG(flag)         (kdb_flags & KDB_FLAG_##flag)
 #define KDB_FLAG_SET(flag)     ((void)(kdb_flags |= KDB_FLAG_##flag))
Index: 081002.linus/kdb/kdbmain.c
===================================================================
--- 081002.linus.orig/kdb/kdbmain.c
+++ 081002.linus/kdb/kdbmain.c
@@ -1673,6 +1673,8 @@ kdb_wait_for_cpus(void)
                                        wait == 1 ? " is" : "s are",
                                        wait == 1 ? "its" : "their");
        }
+       /* give back the vector we took over in smp_kdb_stop */
+       kdb_giveback_vector(KDB_VECTOR);
 #endif /* CONFIG_SMP */
 }
 
-- 
Cliff Wickman
Silicon Graphics, Inc.
[EMAIL PROTECTED]
(651) 683-3824
---------------------------
Use http://oss.sgi.com/ecartis to modify your settings or to unsubscribe.

Reply via email to