Paul Mackerras wrote: > Kamalesh Babulal writes: > >> The SHA1 ID of the kernel is 0e81a8ae37687845f7cdfa2adce14ea6a5f1dd34 >> (2.6.25-rc8) >> and the source seems to have the patch >> 44387e9ff25267c78a99229aca55ed750e9174c7. >> >> The kernel was patched only the patch you gave me >> (http://lkml.org/lkml/2008/4/8/42). > > Please try again with both that patch and the one below. Once again > it won't fix the bug but will give us more information. When the oops > occurs, the kernel will print a lot of debug information that should > help locate the problem. > > Paul. > > diff --git a/arch/powerpc/kernel/asm-offsets.c > b/arch/powerpc/kernel/asm-offsets.c > index e932b43..f16db50 100644 > --- a/arch/powerpc/kernel/asm-offsets.c > +++ b/arch/powerpc/kernel/asm-offsets.c > @@ -144,6 +144,9 @@ int main(void) > DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr)); > DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); > DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); > + DEFINE(PACASLBLOG, offsetof(struct paca_struct, slblog)); > + DEFINE(PACASLBLOGIX, offsetof(struct paca_struct, slblog_ix)); > + DEFINE(PACALASTSLB, offsetof(struct paca_struct, last_slb)); > > DEFINE(SLBSHADOW_STACKVSID, > offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S > index 148a354..663df17 100644 > --- a/arch/powerpc/kernel/entry_64.S > +++ b/arch/powerpc/kernel/entry_64.S > @@ -419,6 +419,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) > slbmte r7,r0 > isync > > + ld r4,PACASLBLOGIX(r13) > + addi r4,r4,1 > + clrldi r4,r4,64-6 > + std r4,PACASLBLOGIX(r13) > + add r4,r4,r13 > + addi r4,r4,PACASLBLOG > + li r5,4 > + std r5,0(r4) > + mftb r5 > + std r5,8(r4) > + std r6,16(r4) > + std r0,24(r4) > 2: > clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ > /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE > @@ -533,6 +545,17 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) > > stdcx. r0,0,r1 /* to clear the reservation */ > > + li r4,0 > + slbmfee r2,r4 > + std r2,PACALASTSLB(r13) > + slbmfev r2,r4 > + std r2,PACALASTSLB+8(r13) > + li r4,1 > + slbmfee r2,r4 > + std r2,PACALASTSLB+16(r13) > + slbmfev r2,r4 > + std r2,PACALASTSLB+24(r13) > + > /* > * Clear RI before restoring r13. If we are returning to > * userspace and we take an exception after restoring r13, > diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c > index 4b5b7ff..c918f33 100644 > --- a/arch/powerpc/kernel/traps.c > +++ b/arch/powerpc/kernel/traps.c > @@ -1141,6 +1141,40 @@ void SPEFloatingPointException(struct pt_regs *regs) > } > #endif > > +static void dump_unrecov_slb(void) > +{ > +#ifdef CONFIG_PPC64 > + long entry, rstart; > + unsigned long esid, vsid; > + > + printk(KERN_EMERG "SLB contents now:\n"); > + for (entry = 0; entry < 64; ++entry) { > + asm volatile("slbmfee %0,%1" : "=r" (esid) : "r" (entry)); > + if (esid == 0) > + /* valid bit is clear along with everything else */ > + continue; > + asm volatile("slbmfev %0,%1" : "=r" (vsid) : "r" (entry)); > + printk(KERN_EMERG "%d: %.16lx %.16lx\n", entry, esid, vsid); > + } > + > + printk(KERN_EMERG "SLB 0-1 at last exception exit:\n"); > + printk(KERN_EMERG "0: %.16lx %.16lx\n", get_paca()->last_slb[0][0], > + get_paca()->last_slb[0][1]); > + printk(KERN_EMERG "1: %.16lx %.16lx\n", get_paca()->last_slb[1][0], > + get_paca()->last_slb[1][1]); > + printk(KERN_EMERG "SLB update log:\n"); > + rstart = entry = get_paca()->slblog_ix; > + do { > + printk(KERN_EMERG "%d: %lx %lx %.16lx %.16lx\n", entry, > + get_paca()->slblog[entry][0], > + get_paca()->slblog[entry][1], > + get_paca()->slblog[entry][2], > + get_paca()->slblog[entry][3]); > + entry = (entry + 1) % 63; > + } while (entry != rstart); > +#endif > +} > + > /* > * We enter here if we get an unrecoverable exception, that is, one > * that happened at a point where the RI (recoverable interrupt) bit > @@ -1151,6 +1185,8 @@ void unrecoverable_exception(struct pt_regs *regs) > { > printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", > regs->trap, regs->nip); > + if (regs->trap == 0x4100) > + dump_unrecov_slb(); > die("Unrecoverable exception", regs, SIGABRT); > } > > diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c > index 906daed..235edf7 100644 > --- a/arch/powerpc/mm/slb.c > +++ b/arch/powerpc/mm/slb.c > @@ -105,6 +105,7 @@ void slb_flush_and_rebolt(void) > * appropriately too. */ > unsigned long linear_llp, vmalloc_llp, lflags, vflags; > unsigned long ksp_esid_data, ksp_vsid_data; > + long logix; > > WARN_ON(!irqs_disabled()); > > @@ -144,6 +145,13 @@ void slb_flush_and_rebolt(void) > "r"(ksp_vsid_data), > "r"(ksp_esid_data) > : "memory"); > + logix = get_paca()->slblog_ix; > + logix = (logix + 1) & 63; > + get_paca()->slblog_ix = logix; > + get_paca()->slblog[logix][0] = 3; > + get_paca()->slblog[logix][1] = mftb(); > + get_paca()->slblog[logix][2] = ksp_esid_data; > + get_paca()->slblog[logix][3] = ksp_vsid_data; > } > > void slb_vmalloc_update(void) > @@ -192,6 +200,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct > *mm) > unsigned long pc = KSTK_EIP(tsk); > unsigned long stack = KSTK_ESP(tsk); > unsigned long unmapped_base; > + long logix; > > if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && > offset <= SLB_CACHE_ENTRIES) { > @@ -204,6 +213,14 @@ void switch_slb(struct task_struct *tsk, struct > mm_struct *mm) > << SLBIE_SSIZE_SHIFT; > slbie_data |= SLBIE_C; /* C set for user addresses */ > asm volatile("slbie %0" : : "r" (slbie_data)); > + > + logix = get_paca()->slblog_ix; > + logix = (logix + 1) & 63; > + get_paca()->slblog_ix = logix; > + get_paca()->slblog[logix][0] = 2; > + get_paca()->slblog[logix][1] = mftb(); > + get_paca()->slblog[logix][2] = slbie_data; > + get_paca()->slblog[logix][3] = 0; > } > asm volatile("isync" : : : "memory"); > } else { > diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S > index 657f6b3..8c7ce20 100644 > --- a/arch/powerpc/mm/slb_low.S > +++ b/arch/powerpc/mm/slb_low.S > @@ -249,6 +249,20 @@ _GLOBAL(slb_compare_rr_to_size) > */ > slbmte r11,r10 > > + ld r3,PACASLBLOGIX(r13) > + addi r3,r3,1 > + clrldi r3,r3,64-6 > + std r3,PACASLBLOGIX(r13) > + sldi r3,r3,5 > + add r3,r3,r13 > + addi r3,r3,PACASLBLOG > + li r9,1 > + std r9,0(r3) > + mftb r9 > + std r9,8(r3) > + std r11,16(r3) > + std r10,24(r3) > + > /* we're done for kernel addresses */ > crclr 4*cr0+eq /* set result to "success" */ > bgelr cr7 > diff --git a/arch/powerpc/platforms/pseries/ras.c > b/arch/powerpc/platforms/pseries/ras.c > index a1ab25c..959ef26 100644 > --- a/arch/powerpc/platforms/pseries/ras.c > +++ b/arch/powerpc/platforms/pseries/ras.c > @@ -325,6 +325,8 @@ static int recover_mce(struct pt_regs *regs, struct > rtas_error_log * err) > > if (err->disposition == RTAS_DISP_FULLY_RECOVERED) { > /* Platform corrected itself */ > + printk(KERN_ERR "FWNMI: platform corrected error %.16lx\n", > + *(unsigned long *)err); > nonfatal = 1; > } else if ((regs->msr & MSR_RI) && > user_mode(regs) && > diff --git a/include/asm-powerpc/paca.h b/include/asm-powerpc/paca.h > index 748b35a..6280b82 100644 > --- a/include/asm-powerpc/paca.h > +++ b/include/asm-powerpc/paca.h > @@ -115,6 +115,11 @@ struct paca_struct { > u64 system_time; /* accumulated system TB ticks */ > u64 startpurr; /* PURR/TB value snapshot */ > u64 startspurr; /* SPURR value snapshot */ > + > + /* SLB update log */ > + long slblog_ix; > + u64 slblog[64][4]; > + u64 last_slb[2][2]; > }; > > extern struct paca_struct paca[]; Hi Paul,
After applying the patch above and the patch posted on http://lkml.org/lkml/2008/4/8/42 the bug had the following information, Unrecoverable exception 4100 at c000000000008d4c SLB contents now: 0: c000000008000000 0000408f92c94500 1: d000000008000000 0000f09b89af5400 2: c000000020000000 0000420e6f8ca500 3: 0000000010000000 0000947fa10bac80 4: 00000000f0000000 00009ef7aa634c80 5: 0000000040000000 000096bdec30bc80 8: 00000000f0000000 00002292895c1c80 9: 0000000040000000 00001a58cb298c80 10: 0000000010000000 0000181a80047c80 12: 00000000f0000000 0000273e59afdc80 13: 0000000040000000 00001f049b7d4c80 14: 0000000010000000 00001cc650583c80 16: 00000000f0000000 00007bbb0a7b3c80 17: 0000000040000000 000073814c48ac80 18: 0000000010000000 0000714301239c80 20: 00000000f0000000 00009ef7aa634c80 21: 0000000040000000 000096bdec30bc80 22: 0000000010000000 0000947fa10bac80 23: c000000718000000 0000950f4be7f500 24: c000000728000000 000095ceba49a500 25: cf00000008000000 0000d59aca40f500 26: 0000000018000000 00004e06613b8c80 27: 00000000f8000000 0000587e6a932c80 28: 0000000048000000 00005044ac609c80 29: c000000778000000 0000998be2321500 30: 00000000f0000000 000008ad8a1b8c80 31: 0000000040000000 00000073cbe8fc80 32: 0000000010000000 0000fe3580c3dc80 33: c000000028000000 0000420e6f8ca500 34: c000000758000000 0000980d056eb500 36: 00000000f0000000 00007bbb0a7b3c80 37: 0000000040000000 000073814c48ac80 38: 0000000010000000 0000714301239c80 39: c000000038000000 000042cdddee5500 40: c000000768000000 000098cc73d06500 41: c000000738000000 0000968e28ab5500 43: 00000000f0000000 000095a009bbcc80 44: 0000000040000000 00008d664b893c80 45: 0000000010000000 00008b2800642c80 47: 00000000f0000000 00009ef7aa634c80 48: 0000000040000000 000096bdec30bc80 49: 0000000010000000 0000947fa10bac80 51: 00000000f0000000 00007bbb0a7b3c80 52: 0000000040000000 000073814c48ac80 53: cf00000018000000 0000d65a38a2a500 54: 0000000010000000 0000714301239c80 55: c000000748000000 0000974d970d0500 57: 00000000f0000000 00009ef7aa634c80 58: 0000000040000000 000096bdec30bc80 59: 0000000010000000 0000947fa10bac80 61: 00000000f0000000 0000f5fe48cc7c80 62: 0000000040000000 0000edc48a99ec80 63: 0000000010000000 0000eb863f74dc80 SLB 0-1 at last exception exit: 0: c000000008000000 0000408f92c94500 1: d000000008000000 0000f09b89af5400 SLB update log: 4: 1 1fa087dccefc17 0000998be2321500 c00000077800001d 5: 2 1fa087dbeb2091 0000000018000000 0000000000000000 6: 1 1fa087dbeb20ac 000093c032a9fc80 0000000008000038 7: 1 1fa087dbeb20bd 00009ef7aa634c80 00000000f8000039 8: 1 1fa087dbeb20d1 000096bdec30bc80 000000004800003a 9: 1 1fa087dbeb37d5 0000947fa10bac80 000000001800003b 10: 2 1fa087dc26370a 0000000008000000 0000000000000000 11: 2 1fa087dc26370f 00000000f8000000 0000000000000000 12: 2 1fa087dc26372f 0000000048000000 0000000000000000 13: 2 1fa087dc263734 0000000018000000 0000000000000000 14: 1 1fa087dc26375f 0000eac6d1132c80 000000000800003c 15: 1 1fa087dc263772 0000f5fe48cc7c80 00000000f800003d 16: 1 1fa087dc263787 0000edc48a99ec80 000000004800003e 17: 1 1fa087dc263bc6 0000eb863f74dc80 000000001800003f 18: 2 1fa087dc264698 0000000008000000 0000000000000000 19: 2 1fa087dc26469e 00000000f8000000 0000000000000000 20: 2 1fa087dc2646a3 0000000048000000 0000000000000000 21: 2 1fa087dc2646a8 0000000018000000 0000000000000000 22: 1 1fa087dc2646be 0000947fa10bac80 0000000018000003 23: 1 1fa087dc2646cd 00009ef7aa634c80 00000000f8000004 24: 1 1fa087dc2646e2 000096bdec30bc80 0000000048000005 25: 1 1fa087dc264829 000093c032a9fc80 0000000008000006 26: 2 1fa087dc7695e9 0000000018000000 0000000000000000 27: 2 1fa087dc7695ee 00000000f8000000 0000000000000000 28: 2 1fa087dc7695f6 0000000048000000 0000000000000000 29: 2 1fa087dc7695fc 0000000008000000 0000000000000000 30: 1 1fa087dc769623 0000175b11a2cc80 0000000008000007 31: 1 1fa087dc769636 00002292895c1c80 00000000f8000008 32: 1 1fa087dc76964b 00001a58cb298c80 0000000048000009 33: 1 1fa087dc76a03d 0000181a80047c80 000000001800000a 34: 2 1fa087dc7840e0 0000000008000000 0000000000000000 35: 2 1fa087dc7840e5 00000000f8000000 0000000000000000 36: 2 1fa087dc784103 0000000048000000 0000000000000000 37: 2 1fa087dc784108 0000000018000000 0000000000000000 38: 1 1fa087dc784134 00001c06e1f68c80 000000000800000b 39: 1 1fa087dc784145 0000273e59afdc80 00000000f800000c 40: 1 1fa087dc78415a 00001f049b7d4c80 000000004800000d 41: 1 1fa087dc78542a 00001cc650583c80 000000001800000e 42: 2 1fa087dc84f844 0000000008000000 0000000000000000 43: 2 1fa087dc84f849 00000000f8000000 0000000000000000 44: 2 1fa087dc84f869 0000000048000000 0000000000000000 45: 2 1fa087dc84f86e 0000000018000000 0000000000000000 46: 1 1fa087dc84f891 0000708392c1ec80 000000000800000f 47: 1 1fa087dc84f8a5 00007bbb0a7b3c80 00000000f8000010 48: 1 1fa087dc84f8c3 000073814c48ac80 0000000048000011 49: 1 1fa087dc84fb2a 0000714301239c80 0000000018000012 50: 2 1fa087dc851369 0000000008000000 0000000000000000 51: 2 1fa087dc85136f 00000000f8000000 0000000000000000 52: 2 1fa087dc851374 0000000048000000 0000000000000000 53: 2 1fa087dc851379 0000000018000000 0000000000000000 54: 1 1fa087dc8513a2 000093c032a9fc80 0000000008000013 55: 1 1fa087dc8513b5 00009ef7aa634c80 00000000f8000014 56: 1 1fa087dc8513c5 000096bdec30bc80 0000000048000015 57: 1 1fa087dc85158f 0000947fa10bac80 0000000018000016 58: 1 1fa087dc858603 0000950f4be7f500 c000000718000017 59: 1 1fa087dc85aa02 000095ceba49a500 c000000728000018 60: 1 1fa087dcb5b5ea 0000d59aca40f500 cf00000008000019 61: 2 1fa087dccefa5a 0000000008000000 0000000000000000 62: 2 1fa087dccefa5f 00000000f8000000 0000000000000000 0: 2 1fa087dccefa69 0000000018000000 0000000000000000 1: 1 1fa087dccefa8f 00004e06613b8c80 000000001800001a 2: 1 1fa087dccefaa4 0000587e6a932c80 00000000f800001b 3: 1 1fa087dccefac6 00005044ac609c80 000000004800001c Oops: Unrecoverable exception, sig: 6 [#1] SMP NR_CPUS=128 NUMA pSeries Modules linked in: NIP: c000000000008d4c LR: 00000000102e9790 CTR: 00000000102686c0 REGS: c00000077304fbb0 TRAP: 4100 Not tainted (2.6.25-rc8-autotest) MSR: 8000000000001030 <ME,IR,DR> CR: 28002488 XER: 20000000 TASK = c000000774bb3200[9954] 'cc1' THREAD: c00000077304c000 CPU: 1 GPR00: 0000000000004000 c00000077304fe30 00000000102e929c 000000000000d032 GPR04: 00000000000000bc 0000000000000000 0000000000000000 0000000000000000 GPR08: 0000000000000037 0000000010440000 00000000f765d1c0 00000000f765c240 GPR12: 0000000048002488 00000000105ba630 0000000010030000 0000000010030000 GPR16: 00000000105b0000 00000000105b0000 0000000010440000 00000000ff9d92d8 GPR20: 000000001043b8f4 00000000102686c0 00000000ff9d91d8 0000000000000000 GPR24: 0000000000000000 0000000010071140 0000000000000000 0000000000000000 GPR28: 00000000105b39bc 00000000f765c530 00000000f7653770 00000000f764fbe0 NIP [c000000000008d4c] restore+0xcc/0xe8 LR [00000000102e9790] 0x102e9790 Call Trace: [c00000077304fe30] [c000000000008d7c] do_work+0x14/0x2c (unreliable) Instruction dump: e88d01f0 f84d01f0 7c841050 e84d01e8 7c422214 f84d01e8 e9a100d8 7c7b03a6 e84101a0 7c4ff120 e8410170 7c5a03a6 <e8010070> e8410080 e8610088 e8810090 ---[ end trace 1d1912fbf2b044ad ]--- -- Thanks & Regards, Kamalesh Babulal, Linux Technology Center, IBM, ISTL. _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev