Module Name:    src
Committed By:   msaitoh
Date:           Tue Feb 13 14:56:52 UTC 2024

Modified Files:
        src/sys/dev/ic: mfireg.h
        src/sys/dev/pci: mfii.c

Log Message:
mfii(4): Apply two changes from OpenBSD to fix an unknown firmware state.

 My own MegaRAID 946N-8i 2G", firmware 50.5.0-2594 failed to attach.

        mfii0: unknown firmware state 1879048192

1879048192 equals to 0x70000000(== MFI_STATE_FW_INIT_2).
Apply following two OpenBSD commits to resolve this problem.

----------------------------
sys/dev/pci/mfii.c OpenBSD rev. 1.86
sys/dev/ic/mfireg.h OpenBSD rev. 1.52

Make mfii(4) recover from firmware FAULT state on startup.

In case firmware initially comes up in FAULT state, reset the device and
give it one more chance to attach successfully. The Linux megaraid_sas
driver applies the same workaround in this case. There seems to be a bug
in some firmware versions which can trigger this behaviour; see mainline
Linux commit 6431f5d7c6025f8b007af06ea090de308f7e6881

Problem observed by me with mfii(4) attached via KVM PCI-passthrough:
mfii0 at pci0 dev 2 function 0 "Symbios Logic MegaRAID SAS2208" rev 0x05: msi
mfii0: firmware fault

With this workaround in place, attachment succeeds and the device works:
mfii0 at pci0 dev 2 function 0 "Symbios Logic MegaRAID SAS2208" rev 0x05: msi
mfii0: firmware fault; attempting full device reset, this can take some time
mfii0: "RAID Ctrl SAS 6G 1GB (D3116C)", firmware 23.29.0-0019, 1024MB cache

Tested for regressions on bare metal by Hrvoje with two different adapters:
mfii0 at pci1 dev 0 function 0 "Symbios Logic MegaRAID SAS3508" rev 0x01: msi
mfii0: "PERC H740P Mini ", firmware 51.16.0-4076, 8192MB cache
mfii0 at pci4 dev 0 function 0 "Symbios Logic MegaRAID SAS2208" rev 0x05: msi
mfii0: "ServeRAID M5110", firmware 23.34.0-0023, 512MB cache

ok jmatthew@

----------------------------
sys/dev/pci/mfii.c OpenBSD rev. 1.87

Give mfii(4) firmware more time to transition out of UNDEFINED state.

Prevents occasional failure to recover from firmware FAULT state where
the driver gave up too early: mfii0: firmware stuck in state 0

ok deraadt@


To generate a diff of this commit:
cvs rdiff -u -r1.24 -r1.25 src/sys/dev/ic/mfireg.h
cvs rdiff -u -r1.31 -r1.32 src/sys/dev/pci/mfii.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ic/mfireg.h
diff -u src/sys/dev/ic/mfireg.h:1.24 src/sys/dev/ic/mfireg.h:1.25
--- src/sys/dev/ic/mfireg.h:1.24	Sat Jul 16 06:52:40 2022
+++ src/sys/dev/ic/mfireg.h	Tue Feb 13 14:56:52 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: mfireg.h,v 1.24 2022/07/16 06:52:40 msaitoh Exp $ */
+/* $NetBSD: mfireg.h,v 1.25 2024/02/13 14:56:52 msaitoh Exp $ */
 /* $OpenBSD: mfireg.h,v 1.24 2006/06/19 19:05:45 marco Exp $ */
 /*
  * Copyright (c) 2006 Marco Peereboom <ma...@peereboom.us>
@@ -110,6 +110,7 @@
 #define MFI_STATE_WAIT_HANDSHAKE	0x60000000
 #define MFI_STATE_FW_INIT_2		0x70000000
 #define MFI_STATE_DEVICE_SCAN		0x80000000
+#define MFI_STATE_BOOT_MESSAGE_PENDING	0x90000000
 #define MFI_STATE_FLUSH_CACHE		0xa0000000
 #define MFI_STATE_READY			0xb0000000
 #define MFI_STATE_OPERATIONAL		0xc0000000
@@ -135,6 +136,7 @@
 #define MFI_INIT_READY			0x00000002
 #define MFI_INIT_MFIMODE		0x00000004
 #define MFI_INIT_CLEAR_HANDSHAKE	0x00000008
+#define MFI_INIT_HOTPLUG		0x00000010
 #define MFI_RESET_FLAGS			MFI_INIT_READY | MFI_INIT_MFIMODE | \
 					MFI_INIT_ABORT
 #define MFI_INIT_HOTPLUG		0x00000010

Index: src/sys/dev/pci/mfii.c
diff -u src/sys/dev/pci/mfii.c:1.31 src/sys/dev/pci/mfii.c:1.32
--- src/sys/dev/pci/mfii.c:1.31	Thu Oct  5 21:41:00 2023
+++ src/sys/dev/pci/mfii.c	Tue Feb 13 14:56:52 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: mfii.c,v 1.31 2023/10/05 21:41:00 christos Exp $ */
+/* $NetBSD: mfii.c,v 1.32 2024/02/13 14:56:52 msaitoh Exp $ */
 /* $OpenBSD: mfii.c,v 1.58 2018/08/14 05:22:21 jmatthew Exp $ */
 
 /*
@@ -19,7 +19,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mfii.c,v 1.31 2023/10/05 21:41:00 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mfii.c,v 1.32 2024/02/13 14:56:52 msaitoh Exp $");
 
 #include "bio.h"
 
@@ -440,6 +440,7 @@ static void		mfii_put_ccb(struct mfii_so
 static int		mfii_init_ccb(struct mfii_softc *);
 static void		mfii_scrub_ccb(struct mfii_ccb *);
 
+static int		mfii_reset_hard(struct mfii_softc *);
 static int		mfii_transition_firmware(struct mfii_softc *);
 static int		mfii_initialise_firmware(struct mfii_softc *);
 static int		mfii_get_info(struct mfii_softc *);
@@ -1489,11 +1490,58 @@ mfii_aen_unregister(struct mfii_softc *s
 	/* XXX */
 }
 
+int
+mfii_reset_hard(struct mfii_softc *sc)
+{
+	uint16_t		i;
+
+	mfii_write(sc, MFI_OSTS, 0);
+
+	/* enable diagnostic register */
+	mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_FLUSH);
+	mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_1);
+	mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_2);
+	mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_3);
+	mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_4);
+	mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_5);
+	mfii_write(sc, MPII_WRITESEQ, MPII_WRITESEQ_6);
+
+	delay(100);
+
+	if ((mfii_read(sc, MPII_HOSTDIAG) & MPII_HOSTDIAG_DWRE) == 0) {
+		aprint_error_dev(sc->sc_dev,
+		    "failed to enable diagnostic read/write\n");
+		return(1);
+	}
+
+	/* reset ioc */
+	mfii_write(sc, MPII_HOSTDIAG, MPII_HOSTDIAG_RESET_ADAPTER);
+
+	/* 240 milliseconds */
+	delay(240000);
+
+	for (i = 0; i < 30000; i++) {
+		if ((mfii_read(sc, MPII_HOSTDIAG) &
+		    MPII_HOSTDIAG_RESET_ADAPTER) == 0)
+			break;
+		delay(10000);
+	}
+	if (i >= 30000) {
+		aprint_error_dev(sc->sc_dev, "failed to reset device\n");
+		return (1);
+	}
+
+	/* disable diagnostic register */
+	mfii_write(sc, MPII_WRITESEQ, 0xff);
+
+	return(0);
+}
+
 static int
 mfii_transition_firmware(struct mfii_softc *sc)
 {
 	int32_t			fw_state, cur_state;
-	int			max_wait, i;
+	int			max_wait, i, reset_on_fault = 1;
 
 	fw_state = mfii_fw_state(sc) & MFI_STATE_MASK;
 
@@ -1501,8 +1549,19 @@ mfii_transition_firmware(struct mfii_sof
 		cur_state = fw_state;
 		switch (fw_state) {
 		case MFI_STATE_FAULT:
-			printf("%s: firmware fault\n", DEVNAME(sc));
-			return (1);
+			if (!reset_on_fault) {
+				aprint_error_dev(sc->sc_dev,
+				    "firmware fault\n");
+				return (1);
+			}
+			aprint_verbose_dev(sc->sc_dev,
+			    "firmware fault; attempting full device reset, "
+			    "this can take some time\n");
+			if (mfii_reset_hard(sc))
+				return (1);
+			max_wait = 20;
+			reset_on_fault = 0;
+			break;
 		case MFI_STATE_WAIT_HANDSHAKE:
 			mfii_write(sc, MFI_SKINNY_IDB,
 			    MFI_INIT_CLEAR_HANDSHAKE);
@@ -1512,17 +1571,22 @@ mfii_transition_firmware(struct mfii_sof
 			mfii_write(sc, MFI_SKINNY_IDB, MFI_INIT_READY);
 			max_wait = 10;
 			break;
-		case MFI_STATE_UNDEFINED:
 		case MFI_STATE_BB_INIT:
-			max_wait = 2;
+			max_wait = 20;
 			break;
+		case MFI_STATE_UNDEFINED:
 		case MFI_STATE_FW_INIT:
+		case MFI_STATE_FW_INIT_2:
 		case MFI_STATE_DEVICE_SCAN:
 		case MFI_STATE_FLUSH_CACHE:
-			max_wait = 20;
+			max_wait = 40;
+			break;
+		case MFI_STATE_BOOT_MESSAGE_PENDING:
+			mfii_write(sc, MFI_SKINNY_IDB, MFI_INIT_HOTPLUG);
+			max_wait = 10;
 			break;
 		default:
-			printf("%s: unknown firmware state %d\n",
+			printf("%s: unknown firmware state %#x\n",
 			    DEVNAME(sc), fw_state);
 			return (1);
 		}
@@ -1537,6 +1601,10 @@ mfii_transition_firmware(struct mfii_sof
 			printf("%s: firmware stuck in state %#x\n",
 			    DEVNAME(sc), fw_state);
 			return (1);
+		} else {
+			DPRINTF("%s: firmware state change %#x -> %#x after "
+			    "%d iterations\n",
+			    DEVNAME(sc), cur_state, fw_state, i);
 		}
 	}
 

Reply via email to