Keith Owens wrote:
> 
> Just because the traces end up in stext_lock does not mean that they
> are the same bug.  Locks are optimized for pipeline performance, the
> code for "got the lock" is in the main text section, the code for
> "cannot get lock, need to wait" is moved to a separate text section.
> That way only the failure case gets pipeline stalls.
> 
> The downside of this optimization is that all code that is waiting for
> a lock appears to be in the out of line section and the only label in
> that section is right at the start.  So all lock code appears to be in
> stext_lock.  What really matters is where the stext_lock code jumps
> back to, that tells you which code is waiting for the lock.  In this
> case you have jumps back to blk_get_queue+9/60 so it is waiting on
> io_request_lock.  Now all you have to do is work out who is holding
> onto io_request_lock.

What do you think of this addition to the x86 debug code, Keith:

If you get an NMI oops it says:

        NMI Watchdog detected LOCKUP on CPU1, registers:
        CPU:    1
        EIP:    0010:[<c01fe212>]
        EFLAGS: 00000086
        Waiting on spinlock! Spinner's EIP is [<c0130d7a>]
        ...


And if you get a spindeadlock with interrupts enabled
and hit ALT-SYSRQ-P it says:

        CPU:1                                           << This is new, as well.
        EIP: 0010:[<c01fe1b7>]  EFLAGS: 00000286
        Waiting on spinlock! Spinner's EIP is [<c0130d7a>]
        EAX: 0000000f EBX: cded5fbc ECX: 00000002 EDX: 000008fc
        ESI: 00000000 EDI: 00000000 EBP: bffff5e4 DS: 0018 ES: 0018
        CR0: 8005003b CR2: 4000a820 CR3: 0e0fc000 CR4: 00000090


Doing this for rwlocks is, umm, trickier.

Will ksymoops automatically parse the EIP?




--- linux-2.4.0-test9-pre4/arch/i386/vmlinux.lds        Tue Jul 11 22:21:12 2000
+++ linux-akpm/arch/i386/vmlinux.lds    Wed Sep 20 20:58:25 2000
@@ -13,7 +13,9 @@
        *(.fixup)
        *(.gnu.warning)
        } = 0x9090
+  __start___text_lock = .;
   .text.lock : { *(.text.lock) }       /* out-of-line lock text */
+  __stop___text_lock = .;
   .rodata : { *(.rodata) }
   .kstrtab : { *(.kstrtab) }
 
--- linux-2.4.0-test9-pre4/arch/i386/kernel/traps.c     Tue Sep 19 19:42:08 2000
+++ linux-akpm/arch/i386/kernel/traps.c Wed Sep 20 23:49:22 2000
@@ -152,6 +152,40 @@
        }
 }
 
+#ifdef CONFIG_SMP
+void show_spinlock_owner(struct pt_regs *regs)
+{
+       extern int __start___text_lock, __stop___text_lock;
+       u8 *start_lock = (u8 *)&__start___text_lock;
+       u8 *stop_lock = (u8 *)&__stop___text_lock;
+       u8 *eip = (u8 *)regs->eip;
+       u8 *last_eip = eip + 16;                /* size of spin_lock_string */
+
+       if (last_eip > stop_lock)
+               last_eip = stop_lock;
+
+       last_eip -= 5;                  /* size of `jmp xxx' */
+
+       /*
+        * Search for the start of the jmp back.  There may be
+        * more than one 0xe9 opcode in there, but we only dump
+        * the first one (which may be wrong).
+        */
+       while (eip >= start_lock && eip < last_eip) {
+               if (*eip == 0xe9) {     /* relative jmp */
+                       u8 *back = *(u8 **)(eip + 1);
+                       back += (u32)(eip + 5);
+                       printk("Waiting on spinlock! Spinner's EIP is [<%p>]\n", back);
+                       break;
+               }
+               eip++;
+       }
+}
+#else
+void show_spinlock_owner(struct pt_regs *regs)
+{}
+#endif
+
 static void show_registers(struct pt_regs *regs)
 {
        int i;
@@ -168,6 +202,7 @@
        }
        printk("CPU:    %d\nEIP:    %04x:[<%08lx>]\nEFLAGS: %08lx\n",
                smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags);
+       show_spinlock_owner(regs);
        printk("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
                regs->eax, regs->ebx, regs->ecx, regs->edx);
        printk("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
@@ -396,7 +431,7 @@
 
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
-extern spinlock_t console_lock;
+extern spinlock_t console_lock, timerlist_lock;
 static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
 
 inline void nmi_watchdog_tick(struct pt_regs * regs)
@@ -411,7 +446,8 @@
         *
         * since NMIs dont listen to _any_ locks, we have to be extremely
         * careful not to rely on unsafe variables. The printk might lock
-        * up though, so we have to break up console_lock first ...
+        * up though, so we have to break up console_lock and timerlist_lock
+        * first ...
         * [when there will be more tty-related locks, break them up
         *  here too!]
         */
@@ -441,6 +477,8 @@
                         */
                        spin_trylock(&console_lock);
                        spin_unlock(&console_lock);
+                       spin_trylock(&timerlist_lock);
+                       spin_unlock(&timerlist_lock);
                        printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", 
cpu);
                        show_registers(regs);
                        printk("console shuts up ...\n");
--- linux-2.4.0-test9-pre4/arch/i386/kernel/process.c   Sat Sep  9 16:19:22 2000
+++ linux-akpm/arch/i386/kernel/process.c       Wed Sep 20 23:54:31 2000
@@ -380,15 +380,18 @@
 }
 
 
+extern void show_spinlock_owner(struct pt_regs *regs);
+
 void show_regs(struct pt_regs * regs)
 {
        unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
 
        printk("\n");
-       printk("EIP: %04x:[<%08lx>]",0xffff & regs->xcs,regs->eip);
+       printk("CPU: %d\nEIP: %04x:[<%08lx>]", smp_processor_id(), 0xffff & 
+regs->xcs,regs->eip);
        if (regs->xcs & 3)
                printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
        printk(" EFLAGS: %08lx\n",regs->eflags);
+       show_spinlock_owner(regs);
        printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
                regs->eax,regs->ebx,regs->ecx,regs->edx);
        printk("ESI: %08lx EDI: %08lx EBP: %08lx",
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/

Reply via email to