Lockups/panics with arbitrary 7.2 boxes

2012-08-24 Thread Sriram Gorti
Hello,

We have been observing the following on quite a few of our i386
FreeBSD 7.2 systems in the course of the last few months:
1. Boxes locking up - to clarify, manual reboot necessary to bring it
back into operation.
2. Boxes panicking somewhere in virtual memory code eg., vdrop, page_fault.

In both cases, did not find anything specifically unusual in their
usage profile, when compared with other similar machines. For example,
SWAPMETA was well within limits, available vm.kmem_map_free was quite
normal. One observation was that the re-start of a periodically
re-starting memory-intensive process coincided with the lockup/panic.

Suspecting that we did not have the fix for some kernel bug that was
perhaps fixed later, looked thru the known FreeBSD 7.2 issues that had
similar back-traces but did not find any. I know this is somewhat of
an old release but any history/pointers for known bugs or any other
suggestions would be most helpful.

Unfortunately, it is not quite an option to debug these boxes when
they hit the issue.

Data for config, selected sysctls and sample backtraces follow.

[1] Key-portions of config

machine  i386
cpu I686_CPU
maxusers256
makeoptionsDEBUG=-g

options SCHED_4BSD
options PREEMPTION  # Enable kernel thread preemption
options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!]
options INET #InterNETworking
options INET6   #IPv6
options FFS #Berkeley Fast Filesystem
options SOFTUPDATES#Enable FFS soft updates support
options MD_ROOT#MD is a potential root device
options PROCFS  #Process filesystem
options PSEUDOFS
options SCSI_DELAY=2000  #Delay (in ms) before probing SCSI
options UFS_DIRHASH
options VFS_AIO
options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions

options SYSVSHM
options SYSVSEM
options SYSVMSG
options SHMMAXPGS=65536
options SEMMNI=40
options SEMMNS=240
options SEMUME=40
options SEMMNU=120

options SMP # Symmetric MultiProcessor Kernel
device  apic# Symmetric (APIC) I/O

device  pci
device  ata
device  atadisk # ATA disk drives
device  atapicd # ATAPI CDROM drives
device  ataraid # ATA RAID drives
options ATA_STATIC_ID   #Static device numbering

device  ahc# AHA2940 and onboard AIC7xxx devices
device  scbus# SCSI bus (required)
device  da # Direct Access (disks)
device  pass #CAM passthrough driver
device  aac   # Adaptec FSA RAID, Dell PERC2/PERC3
device  amr   # AMI MegaRAID

device  atkbdc
device  atkbd
device  vga
device  sc
device  sio

device  miibus  # MII bus support
device  fxp # Intel EtherExpress PRO/100B (82557, 82558)
device  bge # Broadcom BCM570x (``Tigon III'')
device  bce
device  em
device  le

device  loop# Network loopback
device  ether   # Ethernet support
device  vlan# Virtual LAN support
device  pty # Pseudo-ttys (telnet etc)
device  md  # Memory disks
device  bpf #Berkeley packet filter

options KDB
options ALT_BREAK_TO_DEBUGGER   # ~CR^B
options KDB_UNATTENDED  # reboot on default
options DDB # reboot on default
options KDB_TRACE   # traceback
options GDB

#HW Crypto
device  crypto# core crypto support
device  cryptodev   # /dev/crypto for access to h/w
device  ubsec# Broadcom 5501, 5601, 58xx
device  hifn# Hifn 7951, 7781, etc.
device  safe   # SafeNet 1141

device  smb   # /dev/smb*
device  smbus   # bus
device  ichsmb  # Intel ICH SMBus controller chips

device  acpi
device  random

options COMPAT_FREEBSD4
options COMPAT_FREEBSD6
options COMPAT_LINUX
options LINPROCFS
options LINSYSFS
device  ichwd
device  mpt

device  ed

# add in IPv6 support
options INET6   # IPv6 communications protocols
device  gif # IPv6 and IPv4 tunneling
device  faith  # IPv6-to-IPv4 relaying (translation)

#buffer kernel logging so messages don't get split
options PRINTF_BUFR_SIZE=256

device  sg  # Linux SCSI passthrough
device  mfip# LSI MegaRAID SAS passthrough, requires CAM
#Bump up kernel message buffer size to capture tty logs.
optionsMSGBUF_SIZE=98304

device ichgpio

# 2 fibs.
options ROUTETABLES=2

[2] Sample back-traces

page fault
db_trace_self_wrapper+38
kdb_backtrace+41
panic+615
trap_fatal+819
trap_pfault+592
trap+946
calltrap+6
vm_page_cache_remove+14
vm_page_alloc+789
vm_fault+2215
trap_pfault+315
trap+560
calltrap+6
sched_switch+406
mi_switch+326

Re: Lockups/panics with arbitrary 7.2 boxes

2012-08-24 Thread Bryan Drewery
On 8/24/2012 6:47 AM, Sriram Gorti wrote:
 We have been observing the following on quite a few of our i386
 FreeBSD 7.2 systems in the course of the last few months:

7.2 reached EoL June 30th, 2010. See http://www.freebsd.org/security/

You should really upgrade.

Bryan
___
freebsd-questions@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-questions
To unsubscribe, send any mail to freebsd-questions-unsubscr...@freebsd.org