Marco Peereboom <slash <at> peereboom.us> writes:

> Correct.  If this isn't the case then I need to see a dmesg before &
> after rebooting and bioctl output before and after reboot.
> 
> Keep in mind that softraid can only detect failure AFTER an io fails.
> This is key, because you could fail a drive and go undetected by
> softraid.

Clear. This is why I tested with 'echo Nonsense > testo'.


Here is what I did, I hope it explains what is going on. 
If not, just ask!

[rebooted]
# bioctl softraid0         
Volume  Status               Size Device  
softraid0 0 Online       299671585280 sd3     RAID1
      0 Online       299671585280 0:0.0   noencl <sd1b>
      1 Online       299671585280 0:1.0   noencl <sd2b>
# df -h
/dev/sd0a      300M    108M    177M    38%    /
/dev/sd3h      9.8G    730M    8.6G     8%    /home
/dev/sd3d     1008M    6.0K    958M     0%    /tmp
/dev/sd3f      7.9G    2.7G    4.8G    36%    /usr
/dev/sd3e      492M   17.1M    450M     4%    /var
/dev/sd3g      2.0G    1.4M    1.9G     0%    /var/mail
/dev/sd3i      7.9G    3.3M    7.5G     0%    /var/www
/dev/sd3j      246G   95.5G    138G    41%    /backup
# cd /backup
# ls -l
total 200231436
[some files listed]
# echo Nonsense > testo_b4
# ls -l testo_b4                                                               
-rw-r--r--  1 root  wheel  9 May 22 11:57 testo_b4
# bioctl softraid0         
Volume  Status               Size Device  
softraid0 0 Online       299671585280 sd3     RAID1
      0 Online       299671585280 0:0.0   noencl <sd1b>
      1 Online       299671585280 0:1.0   noencl <sd2b>
# [pull drive]
# dmesg
OpenBSD 4.5 (GENERIC.MP) #0: Thu May 14 18:57:01 SGT 2009
    r...@claude2.uwe.uniten.edu.my:/usr/src/sys/arch/amd64
/compile/GENERIC.MP
real mem = 3756994560 (3582MB)
avail mem = 3634552832 (3466MB)
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.3 @ 0xec000 (62 entries)
bios0: vendor HP version "D17" date 07/16/2007
bios0: HP ProLiant ML350 G4
acpi0 at bios0: rev 2
acpi0: tables DSDT FACP SPCR MCFG APIC
acpi0: wakeup devices
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Xeon(TM) CPU 3.00GHz, 3000.53 MHz
cpu0:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,
CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,SBF,SSE3,MWAIT,DS-CPL,
CNXT-ID,CX16,xTPR,LONG
cpu0: 1MB 64b/line 8-way L2 cache
cpu0: apic clock running at 200MHz
cpu1 at mainbus0: apid 6 (application processor)
cpu1: Intel(R) Xeon(TM) CPU 3.00GHz, 3000.11 MHz
cpu1:
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,
CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,SBF,SSE3,MWAIT,DS-CPL,
CNXT-ID,CX16,xTPR,LONG
cpu1: 1MB 64b/line 8-way L2 cache
ioapic0 at mainbus0 apid 8 pa 0xfec00000, version 20, 24 pins
ioapic1 at mainbus0 apid 9 pa 0xfec10000, version 20, 24 pins
ioapic1: misconfigured as apic 0, remapped to apid 9
ioapic2 at mainbus0 apid 10 pa 0xfec80000, version 20, 24 pins
ioapic3 at mainbus0 apid 11 pa 0xfec80400, version 20, 24 pins
acpiprt0 at acpi0: bus 1 (IP2P)
acpiprt1 at acpi0: bus 2 (IPXB)
acpiprt2 at acpi0: bus 6 (PCXA)
acpiprt3 at acpi0: bus 9 (PCXB)
acpiprt4 at acpi0: bus 5 (PTA0)
acpiprt5 at acpi0: bus 13 (PTB0)
acpiprt6 at acpi0: bus 16 (PTC0)
acpiprt7 at acpi0: bus 0 (PCI0)
acpicpu0 at acpi0
acpicpu1 at acpi0
acpitz0 at acpi0: critical temperature 31 degC
pci0 at mainbus0 bus 0: configuration mode 1
pchb0 at pci0 dev 0 function 0 "Intel E7520 Host" rev 0x0c
ppb0 at pci0 dev 2 function 0 "Intel E7520 PCIE" rev 0x0c
pci1 at ppb0 bus 5
ppb1 at pci1 dev 0 function 0 "Intel PCIE-PCIE" rev 0x09
pci2 at ppb1 bus 6
ppb2 at pci1 dev 0 function 2 "Intel PCIE-PCIE" rev 0x09
pci3 at ppb2 bus 9
ppb3 at pci0 dev 4 function 0 "Intel E7520 PCIE" rev 0x0c
pci4 at ppb3 bus 13
ppb4 at pci0 dev 6 function 0 "Intel E7520 PCIE" rev 0x0c
pci5 at ppb4 bus 16
ppb5 at pci0 dev 28 function 0 "Intel 6300ESB PCIX" rev 0x02
pci6 at ppb5 bus 2
mpi0 at pci6 dev 3 function 0 "Symbios Logic 53c1030" rev 0x08: 
apic 9 int 0 (irq 5)
scsibus0 at mpi0: 16 targets, initiator 7
sd0 at scsibus0 targ 0 lun 0: <COMPAQ, BF03688284, HPB3> SCSI3 0/direct fixed
sd0: 34732MB, 512 bytes/sec, 71132000 sec total
sd1 at scsibus0 targ 3 lun 0: <COMPAQ, BF3008AFEC, HPB1> SCSI3 0/direct fixed
sd1: 286102MB, 512 bytes/sec, 585937500 sec total
sd2 at scsibus0 targ 5 lun 0: <COMPAQ, BF3008AFEC, HPB1> SCSI3 0/direct fixed
sd2: 286102MB, 512 bytes/sec, 585937500 sec total
mpi0: target 0 Sync at 160MHz width 16bit offset 63 QAS 1 DT 1 IU 1
mpi0: target 3 Sync at 160MHz width 16bit offset 127 QAS 1 DT 1 IU 1
mpi0: target 5 Sync at 160MHz width 16bit offset 127 QAS 1 DT 1 IU 1
mpi1 at pci6 dev 3 function 1 "Symbios Logic 53c1030" rev 0x08: 
apic 9 int 1 (irq 5)
scsibus1 at mpi1: 16 targets, initiator 7
uhci0 at pci0 dev 29 function 0 "Intel 6300ESB USB" rev 0x02: 
apic 8 int 16 (irq 5)
uhci1 at pci0 dev 29 function 1 "Intel 6300ESB USB" rev 0x02: 
apic 8 int 19 (irq 5)
"Intel 6300ESB WDT" rev 0x02 at pci0 dev 29 function 4 not configured
"Intel 6300ESB APIC" rev 0x02 at pci0 dev 29 function 5 not configured
ehci0 at pci0 dev 29 function 7 "Intel 6300ESB USB" rev 0x02: 
apic 8 int 23 (irq 5)
usb0 at ehci0: USB revision 2.0
uhub0 at usb0 "Intel EHCI root hub" rev 2.00/1.00 addr 1
ppb6 at pci0 dev 30 function 0 "Intel 82801BA Hub-to-PCI" rev 0x0a
pci7 at ppb6 bus 1
bge0 at pci7 dev 2 function 0 "Broadcom BCM5705K" rev 0x03, BCM5705 A3
(0x3003): apic 8 int 17 (irq 5), address 00:13:21:ae:d5:03
brgphy0 at bge0 phy 1: BCM5705 10/100/1000baseT PHY, rev. 2
vga1 at pci7 dev 3 function 0 "ATI Rage XL" rev 0x27
wsdisplay0 at vga1 mux 1: console (80x25, vt100 emulation)
wsdisplay0: screen 1-5 added (80x25, vt100 emulation)
vendor "Compaq", unknown product 0x00d7 (class system subclass 
miscellaneous, rev 0x01) at pci7 dev 4 function 0 not configured
pcib0 at pci0 dev 31 function 0 "Intel 6300ESB LPC" rev 0x02
pciide0 at pci0 dev 31 function 1 "Intel 6300ESB IDE" rev 0x02: DMA, 
channel 0 configured to compatibility, channel 1 configured to compatibility
pciide0: channel 0 disabled (no drives)
atapiscsi0 at pciide0 channel 1 drive 1
scsibus2 at atapiscsi0: 2 targets
cd0 at scsibus2 targ 0 lun 0: <HL-DT-ST, CD-ROM GCR-8482B, 2.09> 
ATAPI 5/cdrom removable
cd0(pciide0:1:1): using PIO mode 4, Ultra-DMA mode 2
usb1 at uhci0: USB revision 1.0
uhub1 at usb1 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb2 at uhci1: USB revision 1.0
uhub2 at usb2 "Intel UHCI root hub" rev 1.00/1.00 addr 1
isa0 at pcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com0: console
com1 at isa0 port 0x2f8/8 irq 3: ns16550a, 16 byte fifo
pckbc0 at isa0 port 0x60/5
pckbd0 at pckbc0 (kbd slot)
pckbc0: using irq 1 for kbd slot
wskbd0 at pckbd0: console keyboard, using wsdisplay0
pmsi0 at pckbc0 (aux slot)
pckbc0: using irq 12 for aux slot
wsmouse0 at pmsi0 mux 0
pcppi0 at isa0 port 0x61
midi0 at pcppi0: <PC speaker>
spkr0 at pcppi0
fdc0 at isa0 port 0x3f0/6 irq 6 drq 2
fd0 at fdc0 drive 0: 1.44MB 80 cyl, 2 head, 18 sec
mtrr: Pentium Pro MTRR support
ugen0 at uhub2 port 1 "American Power Conversion Back-UPS RS 1000 
FW:7.g8 .I USB FW:g8" rev 1.10/1.06 addr 2
softraid0 at root
scsibus3 at softraid0: 1 targets
sd3 at scsibus3 targ 0 lun 0: <OPENBSD, SR RAID 1, 003> SCSI2 0/direct fixed
sd3: 285789MB, 512 bytes/sec, 585296066 sec total
root on sd0a swap on sd0b dump on sd0b
# bioctl softraid0 
Volume  Status               Size Device  
softraid0 0 Degraded     299671585280 sd3     RAID1
      0 Offline      299671585280 0:0.0   noencl <sd1b>
      1 Online       299671585280 0:1.0   noencl <sd2b>
# disklabel sd2       
# Inside MBR partition 3: type A6 start 63 size 585922617
# /dev/rsd2c:
type: SCSI
disk: SCSI disk
label: BF3008AFEC      
flags:
bytes/sector: 512
sectors/track: 63
tracks/cylinder: 255
sectors/cylinder: 16065
cylinders: 36472
total sectors: 585937500
rpm: 15000
interleave: 1
trackskew: 0
cylinderskew: 0
headswitch: 0           # microseconds
track-to-track seek: 0  # microseconds
drivedata: 0 

16 partitions:
#                size           offset  fstype [fsize bsize  cpg]
  a:           626472               63  4.2BSD   2048 16384    1 
  b:        585296145           626535    RAID                   
  c:        585937500                0  unused                   
# disklabel sd1 
# /dev/rsd1c:
type: SCSI
disk: SCSI disk
label: BF3008AFEC      
flags:
bytes/sector: 512
sectors/track: 63
tracks/cylinder: 255
sectors/cylinder: 16065
cylinders: 36472
total sectors: 585937500
rpm: 15000
interleave: 1
trackskew: 0
cylinderskew: 0
headswitch: 0           # microseconds
track-to-track seek: 0  # microseconds
drivedata: 0 

16 partitions:
#                size           offset  fstype [fsize bsize  cpg]
  a:           626472               63  4.2BSD   2048 16384    1 
  b:        585296145           626535    RAID                   
  c:        585937500         

[Now output of serial console:]
# /etc/rc.shutdown in progress...

/etc/rc.shutdown complete.
May 22 12:07:26 apcupsd[25197]: apcupsd shutdown succeeded


# syncing disks... done

sd3 detached

scsibus3 detached



The operating system has halted.

Please press any key to reboot.

[push drive back in and press a key]

[...]

ugen0 at uhub2 port 1 "American Power Conversion Back-UPS RS 1000
FW:7.g8 .I USB FW:g8" rev 1.10/1.06 addr 2

softraid0 at root

softraid0: sd3 was not shutdown properly

scsibus3 at softraid0: 1 targets

sd3 at scsibus3 targ 0 lun 0: <OPENBSD, SR RAID 1, 003> SCSI2 0/direct fixed

sd3: 285789MB, 512 bytes/sec, 585296066 sec total

root on sd0a swap on sd0b dump on sd0b

Automatic boot in progress: starting file system checks.
/dev/rsd0a: file system is clean; not checking
/dev/rsd3h: 58 files, 373692 used, 4787881 free 
(25 frags, 598482 blocks, 0.0% fragmentation)
/dev/rsd3h: MARKING FILE SYSTEM CLEAN
/dev/rsd3d: 3 files, 3 used, 516276 free (28 frags, 64531 blocks, 
0.0% fragmentation)
/dev/rsd3d: MARKING FILE SYSTEM CLEAN
/dev/rsd3f: file system is clean; not checking
/dev/rsd3e: UNALLOCATED  I=16036  OWNER=root MODE=0
/dev/rsd3e: SIZE=0 MTIME=May 22 04:07 2009 
NAME=/run/syslog.pid

/dev/rsd3e: UNEXPECTED INCONSISTENCY; RUN fsck_ffs MANUALLY.
/dev/rsd3g: file system is clean; not checking
/dev/rsd3i: 265 files, 1700 used, 4124016 free 
(56 frags, 515495 blocks, 0.0% fragmentation)
/dev/rsd3i: MARKING FILE SYSTEM CLEAN
/dev/rsd3j: file system is clean; not checking
THE FOLLOWING FILE SYSTEM HAD AN UNEXPECTED INCONSISTENCY:

        ffs: /dev/rsd3e (/var)

Automatic file system check failed; help!
Enter pathname of shell or RETURN for sh: 
# bioctl softraid0

Volume  Status               Size Device  
softraid0 0 Online       299671585280 sd3     RAID1

      0 Online       299671585280 0:0.0   noencl <sd1b>
      1 Online       299671585280 0:1.0   noencl <sd2b>

# fsck /dev/rsd3e

** /dev/rsd3e
** Last Mounted on /var
** Phase 1 - Check Blocks and Sizes
** Phase 2 - Check Pathnames
UNALLOCATED  I=16036  OWNER=root MODE=0
SIZE=0 MTIME=May 22 04:07 2009 
NAME=/run/syslog.pid

REMOVE? [Fyn?] y

UNALLOCATED  I=16044  OWNER=root MODE=0
SIZE=0 MTIME=May 22 04:07 2009 
NAME=/run/inetd.pid

REMOVE? [Fyn?] y

UNALLOCATED  I=16045  OWNER=root MODE=0
SIZE=0 MTIME=May 22 04:07 2009 
NAME=/run/sshd.pid

REMOVE? [Fyn?] y

UNALLOCATED  I=16081  OWNER=root MODE=0
SIZE=0 MTIME=May 22 04:07 2009 
NAME=/run/cron.pid

REMOVE? [Fyn?] y

UNALLOCATED  I=32136  OWNER=root MODE=0

SIZE=0 MTIME=May 22 04:07 2009 
FILE=/spool/postfix/dev/log


REMOVE? [Fyn?] y

** Phase 3 - Check Connectivity

** Phase 4 - Check Reference Counts
UNREF FILE  I=48239  OWNER=root MODE=140600
SIZE=0 MTIME=May 22 03:53 2009 
RECONNECT? [Fyn?] y

** Phase 5 - Check Cyl groups
FREE BLK COUNT(S) WRONG IN SUPERBLK
SALVAGE? [Fyn?] y

SUMMARY INFORMATION BAD
SALVAGE? [Fyn?] y

BLK(S) MISSING IN BIT MAPS
SALVAGE? [Fyn?] y

609 files, 8747 used, 243204 free (108 frags, 30387 blocks, 0.0% fragmentation)


MARK FILE SYSTEM CLEAN? [Fyn?] y


***** FILE SYSTEM WAS MODIFIED *****

[reboot]

[...]

softraid0 at root

scsibus3 at softraid0: 1 targets

sd3 at scsibus3 targ 0 lun 0: <OPENBSD, SR RAID 1, 003> SCSI2 0/direct fixed

sd3: 285789MB, 512 bytes/sec, 585296066 sec total

root on sd0a swap on sd0b dump on sd0b

Automatic boot in progress: starting file system checks.
/dev/rsd0a: file system is clean; not checking
/dev/rsd3h: file system is clean; not checking
/dev/rsd3d: file system is clean; not checking
/dev/rsd3f: 215473 files, 1430232 used, 2699500 free 
(1148 frags, 337294 blocks, 0.0% fragmentation)
/dev/rsd3f: MARKING FILE SYSTEM CLEAN
/dev/rsd3e: file system is clean; not checking
/dev/rsd3g: 3 files, 697 used, 1033511 free 
(15 frags, 129187 blocks, 0.0% fragmentation)
/dev/rsd3g: MARKING FILE SYSTEM CLEAN
/dev/rsd3i: file system is clean; not checking
/dev/rsd3j: file system is clean; not checkingWARNING: 
R/W mount of /backup denied.  Filesystem is not clean - run fsck


mount_ffs: /dev/sd3j on /backup: filesystem must be mounted read-only;
you may need to run fsck
setting tty flags
pf enabled
machdep.allowaperture: 0 -> 2
starting network

rm: /var/spool/lock/LCK..: Bad file descriptor
starting system logger
rm: /var/named/dev/log: Bad file descriptor
starting named
starting initial daemons: ntpd.
savecore: no core dump
checking quotas: done.
building ps databases: kvm dev.
clearing /tmp
starting pre-securelevel daemons:.
setting kernel security level: kern.securelevel: 0 -> 1
turdev = 0x434, block = 1216, fs = /var

ning on accountipanic: ng

creating runffs_blkfree: freeing free fragtime link editor

 directory cacheStopped at      Debugger+0x5:   leave

RUN AT LEAST 'trace' AND 'ps' AND INCLUDE OUTPUT WHEN REPORTING THIS PANIC!

IF RUNNING SMP, USE 'mach ddbcpu <#>' AND 'trace' ON OTHER PROCESSORS, TOO.

DO NOT EVEN BOTHER REPORTING THIS WITHOUT INCLUDING THAT INFORMATION!

ddb{0}> trace

Debugger() at Debugger+0x5

panic() at panic+0x122

ffs_blkfree() at ffs_blkfree+0x61

ffs_realloccg() at ffs_realloccg+0x42a

ffs1_balloc() at ffs1_balloc+0x3ae

ffs_write() at ffs_write+0x1ca

VOP_WRITE() at VOP_WRITE+0x31

vn_rdwr() at vn_rdwr+0xe4

acct_process() at acct_process+0x24b

exit1() at exit1+0x121

sys_exit() at sys_exit+0x13

syscall() at syscall+0x13e

--- syscall (number 1) ---

end of kernel

end trace frame: 0x7f7ffffbf320, count: -12

acpi_pdirpa+0x411632:

ddb{0}> ps  

   PID   PPID   PGRP    UID  S       FLAGS  WAIT          COMMAND

* 1772  26803  26803      0  7   0x2006002                ldconfig

 25653      0      0      0  3   0x2100200  acct          acct

 15307  16924  11344     83  2   0x2000180                ntpd

 16924  11344  11344     83  3   0x2000180  poll          ntpd

 11344      1  11344      0  3   0x2000080  poll          ntpd

 14424   1611   1611     70  3   0x2000180  select        named

  1611      1   1611      0  3   0x2000180  netio         named

 17379   9225   9225     74  3   0x2000180  bpf           pflogd

  9225      1   9225      0  3   0x2000080  netio         pflogd

 19215  27924  27924     73  7   0x2000180                syslogd

 27924      1  27924      0  3   0x2000088  netio         syslogd

 26803      1  26803      0  3   0x2004082  pause         sh

    16      0      0      0  3   0x2100200  bored         crypto

    15      0      0      0  3   0x2100200  aiodoned      aiodoned

    14      0      0      0  3   0x2100200  syncer        update

    13      0      0      0  3   0x2100200  cleaner       cleaner

    12      0      0      0  3    0x100200  reaper        reaper

    11      0      0      0  3   0x2100200  pgdaemon      pagedaemon

    10      0      0      0  3   0x2100200  pftm          pfpurge

     9      0      0      0  3   0x2100200  usbevt        usb2

     8      0      0      0  3   0x2100200  usbevt        usb1

     7      0      0      0  3   0x2100200  usbtsk        usbtask

     6      0      0      0  3   0x2100200  usbevt        usb0

     5      0      0      0  3   0x2100200  acpi_idle     acpi0

     4      0      0      0  3    0x100200                idle1

     3      0      0      0  3   0x2100200  bored         syswq

     2      0      0      0  3    0x100200                idle0

     1      0      1      0  3   0x2004080  wait          init

     0     -1      0      0  3   0x2080200  scheduler     swapper


(I think I sent you a screenshot of this type of panic earlier)


I hope this helps,

Uwe

Reply via email to