Module Name: src Committed By: msaitoh Date: Tue Feb 13 14:56:52 UTC 2024
Modified Files: src/sys/dev/ic: mfireg.h src/sys/dev/pci: mfii.c Log Message: mfii(4): Apply two changes from OpenBSD to fix an unknown firmware state. My own MegaRAID 946N-8i 2G", firmware 50.5.0-2594 failed to attach. mfii0: unknown firmware state 1879048192 1879048192 equals to 0x70000000(== MFI_STATE_FW_INIT_2). Apply following two OpenBSD commits to resolve this problem. ---------------------------- sys/dev/pci/mfii.c OpenBSD rev. 1.86 sys/dev/ic/mfireg.h OpenBSD rev. 1.52 Make mfii(4) recover from firmware FAULT state on startup. In case firmware initially comes up in FAULT state, reset the device and give it one more chance to attach successfully. The Linux megaraid_sas driver applies the same workaround in this case. There seems to be a bug in some firmware versions which can trigger this behaviour; see mainline Linux commit 6431f5d7c6025f8b007af06ea090de308f7e6881 Problem observed by me with mfii(4) attached via KVM PCI-passthrough: mfii0 at pci0 dev 2 function 0 "Symbios Logic MegaRAID SAS2208" rev 0x05: msi mfii0: firmware fault With this workaround in place, attachment succeeds and the device works: mfii0 at pci0 dev 2 function 0 "Symbios Logic MegaRAID SAS2208" rev 0x05: msi mfii0: firmware fault; attempting full device reset, this can take some time mfii0: "RAID Ctrl SAS 6G 1GB (D3116C)", firmware 23.29.0-0019, 1024MB cache Tested for regressions on bare metal by Hrvoje with two different adapters: mfii0 at pci1 dev 0 function 0 "Symbios Logic MegaRAID SAS3508" rev 0x01: msi mfii0: "PERC H740P Mini ", firmware 51.16.0-4076, 8192MB cache mfii0 at pci4 dev 0 function 0 "Symbios Logic MegaRAID SAS2208" rev 0x05: msi mfii0: "ServeRAID M5110", firmware 23.34.0-0023, 512MB cache ok jmatthew@ ---------------------------- sys/dev/pci/mfii.c OpenBSD rev. 1.87 Give mfii(4) firmware more time to transition out of UNDEFINED state. Prevents occasional failure to recover from firmware FAULT state where the driver gave up too early: mfii0: firmware stuck in state 0 ok deraadt@ To generate a diff of this commit: cvs rdiff -u -r1.24 -r1.25 src/sys/dev/ic/mfireg.h cvs rdiff -u -r1.31 -r1.32 src/sys/dev/pci/mfii.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/ic/mfireg.h diff -u src/sys/dev/ic/mfireg.h:1.24 src/sys/dev/ic/mfireg.h:1.25 --- src/sys/dev/ic/mfireg.h:1.24 Sat Jul 16 06:52:40 2022 +++ src/sys/dev/ic/mfireg.h Tue Feb 13 14:56:52 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: mfireg.h,v 1.24 2022/07/16 06:52:40 msaitoh Exp $ */ +/* $NetBSD: mfireg.h,v 1.25 2024/02/13 14:56:52 msaitoh Exp $ */ /* $OpenBSD: mfireg.h,v 1.24 2006/06/19 19:05:45 marco Exp $ */ /* * Copyright (c) 2006 Marco Peereboom <ma...@peereboom.us> @@ -110,6 +110,7 @@ #define MFI_STATE_WAIT_HANDSHAKE 0x60000000 #define MFI_STATE_FW_INIT_2 0x70000000 #define MFI_STATE_DEVICE_SCAN 0x80000000 +#define MFI_STATE_BOOT_MESSAGE_PENDING 0x90000000 #define MFI_STATE_FLUSH_CACHE 0xa0000000 #define MFI_STATE_READY 0xb0000000 #define MFI_STATE_OPERATIONAL 0xc0000000 @@ -135,6 +136,7 @@ #define MFI_INIT_READY 0x00000002 #define MFI_INIT_MFIMODE 0x00000004 #define MFI_INIT_CLEAR_HANDSHAKE 0x00000008 +#define MFI_INIT_HOTPLUG 0x00000010 #define MFI_RESET_FLAGS MFI_INIT_READY | MFI_INIT_MFIMODE | \ MFI_INIT_ABORT #define MFI_INIT_HOTPLUG 0x00000010 Index: src/sys/dev/pci/mfii.c diff -u src/sys/dev/pci/mfii.c:1.31 src/sys/dev/pci/mfii.c:1.32 --- src/sys/dev/pci/mfii.c:1.31 Thu Oct 5 21:41:00 2023 +++ src/sys/dev/pci/mfii.c Tue Feb 13 14:56:52 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: mfii.c,v 1.31 2023/10/05 21:41:00 christos Exp $ */ +/* $NetBSD: mfii.c,v 1.32 2024/02/13 14:56:52 msaitoh Exp $ */ /* $OpenBSD: mfii.c,v 1.58 2018/08/14 05:22:21 jmatthew Exp $ */ /* @@ -19,7 +19,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: mfii.c,v 1.31 2023/10/05 21:41:00 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: mfii.c,v 1.32 2024/02/13 14:56:52 msaitoh Exp $"); #include "bio.h" @@ -440,6 +440,7 @@ static void mfii_put_ccb(struct mfii_so static int mfii_init_ccb(struct mfii_softc *); static void mfii_scrub_ccb(struct mfii_ccb *); +static int mfii_reset_hard(struct mfii_softc *); static int mfii_transition_firmware(struct mfii_softc *); static int mfii_initialise_firmware(struct mfii_softc *); static int mfii_get_info(struct mfii_softc *); @@ -1489,11 +1490,58 @@ mfii_aen_unregister(struct mfii_softc *s /* XXX */ } +int +mfii_reset_hard(struct mfii_softc *sc) +{ + uint16_t i; + + mfii_write(sc, MFI_OSTS, 0); + + /* enable diagnostic register */ + mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_FLUSH); + mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_1); + mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_2); + mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_3); + mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_4); + mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_5); + mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_6); + + delay(100); + + if ((mfii_read(sc, MPII_HOSTDIAG) & MPII_HOSTDIAG_DWRE) == 0) { + aprint_error_dev(sc->sc_dev, + "failed to enable diagnostic read/write\n"); + return(1); + } + + /* reset ioc */ + mfii_write(sc, MPII_HOSTDIAG, MPII_HOSTDIAG_RESET_ADAPTER); + + /* 240 milliseconds */ + delay(240000); + + for (i = 0; i < 30000; i++) { + if ((mfii_read(sc, MPII_HOSTDIAG) & + MPII_HOSTDIAG_RESET_ADAPTER) == 0) + break; + delay(10000); + } + if (i >= 30000) { + aprint_error_dev(sc->sc_dev, "failed to reset device\n"); + return (1); + } + + /* disable diagnostic register */ + mfii_write(sc, MPII_WRITESEQ, 0xff); + + return(0); +} + static int mfii_transition_firmware(struct mfii_softc *sc) { int32_t fw_state, cur_state; - int max_wait, i; + int max_wait, i, reset_on_fault = 1; fw_state = mfii_fw_state(sc) & MFI_STATE_MASK; @@ -1501,8 +1549,19 @@ mfii_transition_firmware(struct mfii_sof cur_state = fw_state; switch (fw_state) { case MFI_STATE_FAULT: - printf("%s: firmware fault\n", DEVNAME(sc)); - return (1); + if (!reset_on_fault) { + aprint_error_dev(sc->sc_dev, + "firmware fault\n"); + return (1); + } + aprint_verbose_dev(sc->sc_dev, + "firmware fault; attempting full device reset, " + "this can take some time\n"); + if (mfii_reset_hard(sc)) + return (1); + max_wait = 20; + reset_on_fault = 0; + break; case MFI_STATE_WAIT_HANDSHAKE: mfii_write(sc, MFI_SKINNY_IDB, MFI_INIT_CLEAR_HANDSHAKE); @@ -1512,17 +1571,22 @@ mfii_transition_firmware(struct mfii_sof mfii_write(sc, MFI_SKINNY_IDB, MFI_INIT_READY); max_wait = 10; break; - case MFI_STATE_UNDEFINED: case MFI_STATE_BB_INIT: - max_wait = 2; + max_wait = 20; break; + case MFI_STATE_UNDEFINED: case MFI_STATE_FW_INIT: + case MFI_STATE_FW_INIT_2: case MFI_STATE_DEVICE_SCAN: case MFI_STATE_FLUSH_CACHE: - max_wait = 20; + max_wait = 40; + break; + case MFI_STATE_BOOT_MESSAGE_PENDING: + mfii_write(sc, MFI_SKINNY_IDB, MFI_INIT_HOTPLUG); + max_wait = 10; break; default: - printf("%s: unknown firmware state %d\n", + printf("%s: unknown firmware state %#x\n", DEVNAME(sc), fw_state); return (1); } @@ -1537,6 +1601,10 @@ mfii_transition_firmware(struct mfii_sof printf("%s: firmware stuck in state %#x\n", DEVNAME(sc), fw_state); return (1); + } else { + DPRINTF("%s: firmware state change %#x -> %#x after " + "%d iterations\n", + DEVNAME(sc), cur_state, fw_state, i); } }