Re: panic in FFS and other related I/O problems
Shouldn't this be put in to an option so it gets documented? -Guido On Mon, Apr 02, 2001 at 09:12:53AM -0700, Peter Wemm wrote: ... diff -u -r1.4 ldscript.i386 --- conf/ldscript.i3862000/01/11 15:35:16 1.4 +++ conf/ldscript.i3862001/04/02 16:07:18 @@ -6,7 +6,7 @@ SECTIONS { /* Read-only sections, merged into text segment: */ - . = 0xc010 + SIZEOF_HEADERS; + . = 0x8010 + SIZEOF_HEADERS; .interp : { *(.interp) } .hash : { *(.hash)} .dynsym: { *(.dynsym) } Index: i386/include/pmap.h === RCS file: /home/ncvs/src/sys/i386/include/pmap.h,v retrieving revision 1.70 diff -u -r1.70 pmap.h --- i386/include/pmap.h 2000/11/30 01:53:02 1.70 +++ i386/include/pmap.h 2001/04/02 16:07:18 @@ -92,9 +92,9 @@ #endif #ifndef NKPDE #ifdef SMP -#define NKPDE254 /* addressable number of page tables/pde's */ +#define NKPDE510 /* addressable number of page tables/pde's */ #else -#define NKPDE255 /* addressable number of page tables/pde's */ +#define NKPDE511 /* addressable number of page tables/pde's */ #endif /* SMP */ #endif To Unsubscribe: send mail to [EMAIL PROTECTED] with "unsubscribe freebsd-hackers" in the body of the message
panic in FFS and other related I/O problems
Hi Everyone, This is the famous bbs with high loads. We upgraded to 4.3-RC recently. The user level we have at this point is not that much compared to before. 2000 users is what we can usually take on without problems. Now we panic() 20 times a day. FreeBSD zoo.ee.ntu.edu.tw 4.3-RC FreeBSD 4.3-RC #0: Tue Apr 3 07:14:31 CST 2001 I am thinking that this is either a bug is FFS I/O operations or our own bbs having race conditions. However, the bbs has run fine without problem before. --- 2001/04/02 22:58 options VM_KMEM_SIZE_MAX="(300*1024*1024)"(200-300) --- 2001/04/02 22:43 run-time panic uptime:10:43¤U¤È up 1:40, 6 users, load averages: 3.69, 2.99, 2.61 bbs:Number of Users:2160 panic: pipeinit: cannot allocate pipe -- out of kvm -- code = 3 mp_lock = 0101; cpuid = 1; lapic.id = Debugger("panic") Stopped at Debugger+0x35: movb$0,in_Debugger.597 db tr Debugger(c0249bf2) at Debugger+0x35 panic(c024b3a0,3,facac440,fe726e7c,c015eaa2) at panic+0xa4 pipespace(facac440) at pipespace+0x58 pipe_write(cacf2540,fe726ed4,ca506300,0,fe55e040) at pipe_write+0x192 dofilewrite(fe55e040,cacf2540,8,80f7000,19fe) at dofilewrite+0xbe write(fe55e040,fe726f80,2823f0c8,28251008,80f7000) at write+0x3b syscall2(2f,2f,2f,80f7000,28251008) at syscall2+0x221 Xint0x80_syscall() at Xint0x80_syscall+0x2b --- 2001/04/02 20:52 PMAP_SHPGPERPROC removed from kernel /bin/mv /etc/vntab.orig /etc/vntab, Add more swap --- 2001/04/02 20:30 run-time panic Fatal trap 12: page fault while in kernel mode mp_lock = 0002; cpuid = 0; lapic.id = 0100 fault virtual address = 0x0 fault code = supervisor write, page not present instruction pointer = 0x8:0xc022fb2b stack pointer = 0x10:0xfd9d1bcc frame pointer = 0x10:0xfd9d1bfc code segment= base 0x0, limit 0xf, type 0x1b = DPL 0, pres 1, def32 1, gran 1 processor eflags= interrupt enabled, resume, IOPL = 0 current process = 4118 (bbsd) interrupt mask = bio - SMP: XXX kernel: type 12 trap, code=0 Stopped at generic_bzero+0xf: repe stosl %es:(%edi) db tr generic_bzero(1,c755fe00,c7243400,fd9d1c44,100) at generic_bzero+0xf ffs_vget(c755fe00,5e3f84,fd9d1cb8,0,fbe61 f40) at ffs_vget+0xa4 ufs_lookup(fd9d1d10,fd9d1d24,c01774ca,fd9d1d10,fbe61f40) at ufs_lookup+0x9c7 ufs_vnoperate(fd9d1d10,fbe61f40,ff27941d,fd9d1ef0,fbe62000) at ufs_vnoperate+0x1 5 vfs_cache_lookup(fd9d1d68,fd9d1d78,c017a338,fd9d1d68,ef8c1d00) at vfs_cache_look up+0x28a ufs_vnoperate(fd9d1d68,ef8c1d00,fd9d1ef0,fd9d1ec8,fd9b7ac0) at ufs_vnoperate+0x1 5 lookup(fd9d1ec8,0,fd9d1ec8,fd9d1f80,fd9b7ac0) at lookup+0x290 namei(fd9d1ec8,0,ca3ed580,fd9d1f80,fd9d1df8) at namei+0x147 vn_open(fd9d1ec8,1,1a4,3,fd9b7ac0) at vn_open+0x1cd open(fd9b7ac0,fd9d1f80,281960c8,281a7f00,4) at open+0xb8 syscall2(2f,2f,bfbf002f,4,281a7f00) at syscall2+0x221 Xint0x80_syscall() at Xint0x80_syscall+0x2b db call boot(0) boot() called on cpu#0 Waiting (max 60 seconds) for system process `bufdaemon' to stop... Fatal trap 12: page fault while in kernel mode mp_lock = 0002; cpuid = 0; lapic.id = 0100 fault virtual address = 0x0 fault code = supervisor write, page not present instruction pointer = 0x8:0xc02082b3 stack pointer = 0x10:0xfe80adec frame pointer = 0x10:0xfe80adf8 code segment= base 0x0, limit 0xf, type 0x1b = DPL 0, pres 1, def32 1, gran 1 processor eflags= interrupt enabled, resume, IOPL = 0 current process = 7749 (bbsd) interrupt mask = net tty bio cam - SMP: XXX kernel: type 12 trap, code=0 Stopped at generic_bzero+0xf: repe stosl %es:(%edi) --- 2000/04/02 08:29 boot panic = lowered PMAP_SHPGPERPROC to 300 panic: swap_pager_swap_init: swap_zone == NULL mp_lock = 0001; cpuid = 0; lapic.id = 0100 Debugger("panic") Stopped at Debugger+0x35: movb$0,in_Debugger.597 --- 2000/04/02 08:24 change kernel again options MAXFILES=3 options NMBCLUSTERS=16000(making this smaller) options PMAP_SHPGPERPROC=720(back to our setup that was stable before) --- 2001/04/02 08:08AM run-time panic _SimFarm_[root]:/bighead#vim dump1.pl panic: ffs_valloc: dup alloc mp_lock = 0101; cpuid = 1; lapic.id = Debugger("panic")
Re: panic in FFS and other related I/O problems
"Michael C . Wu" wrote: Hi Everyone, This is the famous bbs with high loads. We upgraded to 4.3-RC recently. The user level we have at this point is not that much compared to before. 2000 users is what we can usually take on without problems. Now we panic() 20 times a day. FreeBSD zoo.ee.ntu.edu.tw 4.3-RC FreeBSD 4.3-RC #0: Tue Apr 3 07:14:31 CST 2001 I am thinking that this is either a bug is FFS I/O operations or our own bbs having race conditions. However, the bbs has run fine without problem before. --- 2001/04/02 22:58 options VM_KMEM_SIZE_MAX="(300*1024*1024)"(200-300) --- 2001/04/02 22:43 run-time panic uptime:10:43¤U¤È up 1:40, 6 users, load averages: 3.69, 2.99, 2.61 bbs:Number of Users:2160 panic: pipeinit: cannot allocate pipe -- out of kvm -- code = 3 You have used up you 1G of KVM. I suggest you reconfigure the KVM on your boxes so that you double it. This will change you from 1G kernel, 3G user to 2G each. Suggested patch: Index: conf/ldscript.i386 === RCS file: /home/ncvs/src/sys/conf/ldscript.i386,v retrieving revision 1.4 diff -u -r1.4 ldscript.i386 --- conf/ldscript.i386 2000/01/11 15:35:16 1.4 +++ conf/ldscript.i386 2001/04/02 16:07:18 @@ -6,7 +6,7 @@ SECTIONS { /* Read-only sections, merged into text segment: */ - . = 0xc010 + SIZEOF_HEADERS; + . = 0x8010 + SIZEOF_HEADERS; .interp : { *(.interp) } .hash : { *(.hash) } .dynsym: { *(.dynsym)} Index: i386/include/pmap.h === RCS file: /home/ncvs/src/sys/i386/include/pmap.h,v retrieving revision 1.70 diff -u -r1.70 pmap.h --- i386/include/pmap.h 2000/11/30 01:53:02 1.70 +++ i386/include/pmap.h 2001/04/02 16:07:18 @@ -92,9 +92,9 @@ #endif #ifndef NKPDE #ifdef SMP -#define NKPDE 254 /* addressable number of page tables/pde's */ +#define NKPDE 510 /* addressable number of page tables/pde's */ #else -#define NKPDE 255 /* addressable number of page tables/pde's */ +#define NKPDE 511 /* addressable number of page tables/pde's */ #endif /* SMP */ #endif You have basically raised so many limits that you have run out of space for kernel allocations. root@overcee[9:09am]/home/src/sys/compile/OVERCEE-103# gdb -k kernel.debug /dev/mem ... (kgdb) print /x kernel_map.size $8 = 0x1a658000 With a default start addr of 0xc010, there is nearly 0x400 available. My machine here is using just less than half of it. Yours is probably almost full. mp_lock = 0101; cpuid = 1; lapic.id = Debugger("panic") Stopped at Debugger+0x35: movb$0,in_Debugger.597 db tr Debugger(c0249bf2) at Debugger+0x35 panic(c024b3a0,3,facac440,fe726e7c,c015eaa2) at panic+0xa4 pipespace(facac440) at pipespace+0x58 pipe_write(cacf2540,fe726ed4,ca506300,0,fe55e040) at pipe_write+0x192 dofilewrite(fe55e040,cacf2540,8,80f7000,19fe) at dofilewrite+0xbe write(fe55e040,fe726f80,2823f0c8,28251008,80f7000) at write+0x3b syscall2(2f,2f,2f,80f7000,28251008) at syscall2+0x221 Xint0x80_syscall() at Xint0x80_syscall+0x2b --- 2001/04/02 20:52 PMAP_SHPGPERPROC removed from kernel /bin/mv /etc/vntab.orig /etc/vntab, Add more swap --- 2001/04/02 20:30 run-time panic Fatal trap 12: page fault while in kernel mode mp_lock = 0002; cpuid = 0; lapic.id = 0100 fault virtual address = 0x0 fault code = supervisor write, page not present instruction pointer = 0x8:0xc022fb2b stack pointer = 0x10:0xfd9d1bcc frame pointer = 0x10:0xfd9d1bfc code segment= base 0x0, limit 0xf, type 0x1b = DPL 0, pres 1, def32 1, gran 1 processor eflags= interrupt enabled, resume, IOPL = 0 current process = 4118 (bbsd) interrupt mask = bio - SMP: XXX kernel: type 12 trap, code=0 Stopped at generic_bzero+0xf: repe stosl %es:(%edi) db tr generic_bzero(1,c755fe00,c7243400,fd9d1c44,100) at generic_bzero+0xf ffs_vget(c755fe00,5e3f84,fd9d1cb8,0,fbe61 f40) at ffs_vget+0xa4 ufs_lookup(fd9d1d10,fd9d1d24,c01774ca,fd9d1d10,fbe61f40) at ufs_lookup+0x9c7 ufs_vnoperate(fd9d1d10,fbe61f40,ff27941d,fd9d1ef0,fbe62000) at ufs_vnoperate+ 0x1 5 vfs_cache_lookup(fd9d1d68,fd9d1d78,c017a338,fd9d1d68,ef8c1d00) at vfs_cache_l ook up+0x28a ufs_vnoperate(fd9d1d68,ef8c1d00,fd9d1ef0,fd9d1ec8,fd9b7ac0) at ufs_vnoperate+ 0x1 5
Re: panic in FFS and other related I/O problems
Hi everyone, After I applied this patch, something happens to my system(zoo.ee.ntu.edu.tw). It seemed that all programs using pthread would coredump with signal 6. and show some messages like this: Apr 3 00:32:32 zoo /kernel: pid 341 (logind), uid 0: exited on signal 6 (core d umped) Fatal error 'Cannot allocate red zone for initial thread' at line ? in file /usr /src/lib/libc_r/uthread/uthread_init.c (errno = ?) Abort trap - core dumped I had already tried recompiled /usr/src/lib but there were no use. Please advice. :) Thanks all, - Original Message - From: "Peter Wemm" [EMAIL PROTECTED] To: "Michael C . Wu" [EMAIL PROTECTED] Cc: [EMAIL PROTECTED]; [EMAIL PROTECTED] Sent: Tuesday, April 03, 2001 12:12 AM Subject: Re: panic in FFS and other related I/O problems : "Michael C . Wu" wrote: : Hi Everyone, : : This is the famous bbs with high loads. We upgraded to 4.3-RC : recently. : : The user level we have at this point is not that much compared : to before. 2000 users is what we can usually take on without : problems. Now we panic() 20 times a day. : : FreeBSD zoo.ee.ntu.edu.tw 4.3-RC FreeBSD 4.3-RC #0: Tue Apr : 3 07:14:31 CST 2001 : : I am thinking that this is either a bug is FFS I/O operations : or our own bbs having race conditions. However, the bbs : has run fine without problem before. : : --- : 2001/04/02 22:58 options VM_KMEM_SIZE_MAX="(300*1024*1024)"(200-300) : --- : 2001/04/02 22:43 run-time panic : uptime:10:43¤U¤È up 1:40, 6 users, load averages: 3.69, 2.99, 2.61 : bbs:Number of Users:2160 : panic: pipeinit: cannot allocate pipe -- out of kvm -- code = 3 : : You have used up you 1G of KVM. I suggest you reconfigure the KVM on your : boxes so that you double it. This will change you from 1G kernel, 3G user : to 2G each. : : Suggested patch: : Index: conf/ldscript.i386 : === : RCS file: /home/ncvs/src/sys/conf/ldscript.i386,v : retrieving revision 1.4 : diff -u -r1.4 ldscript.i386 : --- conf/ldscript.i386 2000/01/11 15:35:16 1.4 : +++ conf/ldscript.i386 2001/04/02 16:07:18 : @@ -6,7 +6,7 @@ : SECTIONS : { :/* Read-only sections, merged into text segment: */ : - . = 0xc010 + SIZEOF_HEADERS; : + . = 0x8010 + SIZEOF_HEADERS; :.interp : { *(.interp) } :.hash : { *(.hash) } :.dynsym: { *(.dynsym) } : Index: i386/include/pmap.h : === : RCS file: /home/ncvs/src/sys/i386/include/pmap.h,v : retrieving revision 1.70 : diff -u -r1.70 pmap.h : --- i386/include/pmap.h 2000/11/30 01:53:02 1.70 : +++ i386/include/pmap.h 2001/04/02 16:07:18 : @@ -92,9 +92,9 @@ : #endif : #ifndef NKPDE : #ifdef SMP : -#define NKPDE 254 /* addressable number of page tables/pde's */ : +#define NKPDE 510 /* addressable number of page tables/pde's */ : #else : -#define NKPDE 255 /* addressable number of page tables/pde's */ : +#define NKPDE 511 /* addressable number of page tables/pde's */ : #endif /* SMP */ : #endif : : You have basically raised so many limits that you have run out of space : for kernel allocations. : root@overcee[9:09am]/home/src/sys/compile/OVERCEE-103# gdb -k kernel.debug /dev/mem : ... : (kgdb) print /x kernel_map.size : $8 = 0x1a658000 : : With a default start addr of 0xc010, there is nearly 0x400 : available. My machine here is using just less than half of it. Yours is : probably almost full. : : : mp_lock = 0101; cpuid = 1; lapic.id = : Debugger("panic") : Stopped at Debugger+0x35: movb$0,in_Debugger.597 : db tr : Debugger(c0249bf2) at Debugger+0x35 : panic(c024b3a0,3,facac440,fe726e7c,c015eaa2) at panic+0xa4 : pipespace(facac440) at pipespace+0x58 : pipe_write(cacf2540,fe726ed4,ca506300,0,fe55e040) at pipe_write+0x192 : dofilewrite(fe55e040,cacf2540,8,80f7000,19fe) at dofilewrite+0xbe : write(fe55e040,fe726f80,2823f0c8,28251008,80f7000) at write+0x3b : syscall2(2f,2f,2f,80f7000,28251008) at syscall2+0x221 : Xint0x80_syscall() at Xint0x80_syscall+0x2b : --- : 2001/04/02 20:52 PMAP_SHPGPERPROC removed from kernel : /bin/mv /etc/vntab.orig /etc/vntab, Add more swap : --- : 2001/04/02 20:30 run-time panic : Fatal trap 12: page fault while in kernel mode : mp_lock = 0002; cpuid = 0; lapic.id = 0100 : fault virtual address = 0x0 : fault code = supervisor write, page not present : instruction pointer = 0x8:0xc022fb2b : stack pointer = 0x10:0xfd9d1bcc : frame pointer = 0x10:0xfd9d1bfc : code segment= base 0x0, limit 0xf