Module Name: src
Committed By: buhrow
Date: Tue Apr 1 23:57:54 UTC 2014
Modified Files:
src/sys/dev/ic: mpt_netbsd.c mpt_netbsd.h
Log Message:
Checking in changes to improve error handling. Specifically:
- if commands timeout, clear the queues to the the card and perform a soft
reset on the LSI hardware since when these timeouts occur, the LSI firmware
is not graceful about recovering at all.
- Recover gracefully from more kinds of errors using the same recovery
mechanism listed above.
Also, implement mpt_ioctl() to handle bus reset requests from scsictl(8).
To generate a diff of this commit:
cvs rdiff -u -r1.19 -r1.20 src/sys/dev/ic/mpt_netbsd.c
cvs rdiff -u -r1.10 -r1.11 src/sys/dev/ic/mpt_netbsd.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/dev/ic/mpt_netbsd.c
diff -u src/sys/dev/ic/mpt_netbsd.c:1.19 src/sys/dev/ic/mpt_netbsd.c:1.20
--- src/sys/dev/ic/mpt_netbsd.c:1.19 Sun Sep 23 01:13:21 2012
+++ src/sys/dev/ic/mpt_netbsd.c Tue Apr 1 23:57:54 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: mpt_netbsd.c,v 1.19 2012/09/23 01:13:21 chs Exp $ */
+/* $NetBSD: mpt_netbsd.c,v 1.20 2014/04/01 23:57:54 buhrow Exp $ */
/*
* Copyright (c) 2003 Wasabi Systems, Inc.
@@ -77,22 +77,28 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mpt_netbsd.c,v 1.19 2012/09/23 01:13:21 chs Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mpt_netbsd.c,v 1.20 2014/04/01 23:57:54 buhrow Exp $");
#include <dev/ic/mpt.h> /* pulls in all headers */
+#include <sys/scsiio.h>
static int mpt_poll(mpt_softc_t *, struct scsipi_xfer *, int);
static void mpt_timeout(void *);
+static void mpt_restart(mpt_softc_t *, request_t *);
static void mpt_done(mpt_softc_t *, uint32_t);
+static int mpt_drain_queue(mpt_softc_t *);
static void mpt_run_xfer(mpt_softc_t *, struct scsipi_xfer *);
static void mpt_set_xfer_mode(mpt_softc_t *, struct scsipi_xfer_mode *);
static void mpt_get_xfer_mode(mpt_softc_t *, struct scsipi_periph *);
static void mpt_ctlop(mpt_softc_t *, void *vmsg, uint32_t);
static void mpt_event_notify_reply(mpt_softc_t *, MSG_EVENT_NOTIFY_REPLY *);
+static void mpt_bus_reset(mpt_softc_t *);
static void mpt_scsipi_request(struct scsipi_channel *,
scsipi_adapter_req_t, void *);
static void mpt_minphys(struct buf *);
+static int mpt_ioctl(struct scsipi_channel *, u_long, void *, int,
+ struct proc *);
/*
* XXX - this assumes the device_private() of the attachement starts with
@@ -121,6 +127,7 @@ mpt_scsipi_attach(mpt_softc_t *mpt)
adapt->adapt_max_periph = maxq - 2;
adapt->adapt_request = mpt_scsipi_request;
adapt->adapt_minphys = mpt_minphys;
+ adapt->adapt_ioctl = mpt_ioctl;
/* Fill in the scsipi_channel. */
memset(chan, 0, sizeof(*chan));
@@ -138,7 +145,8 @@ mpt_scsipi_attach(mpt_softc_t *mpt)
chan->chan_ntargets = mpt->mpt_max_devices;
chan->chan_id = mpt->mpt_ini_id;
- (void) config_found(mpt->sc_dev, &mpt->sc_channel, scsiprint);
+/*Save the output of the config so we can rescan the bus in case of errors*/
+ mpt->sc_scsibus_dv = config_found(mpt->sc_dev, &mpt->sc_channel, scsiprint);
}
int
@@ -303,26 +311,11 @@ mpt_intr(void *arg)
{
mpt_softc_t *mpt = arg;
int nrepl = 0;
- uint32_t reply;
if ((mpt_read(mpt, MPT_OFFSET_INTR_STATUS) & MPT_INTR_REPLY_READY) == 0)
return (0);
- reply = mpt_pop_reply_queue(mpt);
- while (reply != MPT_REPLY_EMPTY) {
- nrepl++;
- if (mpt->verbose > 1) {
- if ((reply & MPT_CONTEXT_REPLY) != 0) {
- /* Address reply; IOC has something to say */
- mpt_print_reply(MPT_REPLY_PTOV(mpt, reply));
- } else {
- /* Context reply; all went well */
- mpt_prt(mpt, "context %u reply OK", reply);
- }
- }
- mpt_done(mpt, reply);
- reply = mpt_pop_reply_queue(mpt);
- }
+nrepl = mpt_drain_queue(mpt);
return (nrepl != 0);
}
@@ -357,13 +350,20 @@ static void
mpt_timeout(void *arg)
{
request_t *req = arg;
- struct scsipi_xfer *xs = req->xfer;
- struct scsipi_periph *periph = xs->xs_periph;
- mpt_softc_t *mpt = DEV_TO_MPT(
- periph->periph_channel->chan_adapter->adapt_dev);
- uint32_t oseq;
- int s;
-
+ struct scsipi_xfer *xs;
+ struct scsipi_periph *periph;
+ mpt_softc_t *mpt;
+ uint32_t oseq;
+ int s, nrepl = 0;
+
+if (req->xfer == NULL) {
+ printf("mpt_timeout: NULL xfer for request index 0x%x, sequenc 0x%x\n",
+ req->index, req->sequence);
+ return;
+ }
+ xs = req->xfer;
+ periph = xs->xs_periph;
+ mpt = (void *) periph->periph_channel->chan_adapter->adapt_dev;
scsipi_printaddr(periph);
printf("command timeout\n");
@@ -373,11 +373,28 @@ mpt_timeout(void *arg)
mpt->timeouts++;
if (mpt_intr(mpt)) {
if (req->sequence != oseq) {
+ mpt->success ++;
mpt_prt(mpt, "recovered from command timeout");
splx(s);
return;
}
}
+
+ /*
+ *Ensure the IOC is really done giving us data since it appears it can
+ *sometimes fail to give us interrupts under heavy load.
+ */
+ nrepl = mpt_drain_queue(mpt);
+ if (nrepl ) {
+ mpt_prt(mpt, "mpt_timeout: recovered %d commands",nrepl);
+ }
+
+ if (req->sequence != oseq) {
+ mpt->success ++;
+ splx(s);
+ return;
+ }
+
mpt_prt(mpt,
"timeout on request index = 0x%x, seq = 0x%08x",
req->index, req->sequence);
@@ -390,14 +407,83 @@ mpt_timeout(void *arg)
if (mpt->verbose > 1)
mpt_print_scsi_io_request((MSG_SCSI_IO_REQUEST *)req->req_vbuf);
- /* XXX WHAT IF THE IOC IS STILL USING IT?? */
- req->xfer = NULL;
- mpt_free_request(mpt, req);
-
xs->error = XS_TIMEOUT;
- scsipi_done(xs);
+ splx(s);
+ mpt_restart(mpt, req);
+}
+
+static void
+mpt_restart(mpt_softc_t *mpt, request_t *req0)
+{
+ int i, s, nreq;
+ request_t *req;
+ struct scsipi_xfer *xs;
+
+ /* first, reset the IOC, leaving stopped so all requests are idle */
+ if (mpt_soft_reset(mpt) != MPT_OK) {
+ mpt_prt(mpt, "soft reset failed");
+ /* don't try a hard reset since this mangles the PCI configuration registers */
+ return;
+ }
+ /* freeze the channel so scsipi doesn't queue more commands */
+ scsipi_channel_freeze(&mpt->sc_channel, 1);
+
+ /* return all pending requests to scsipi and de-allocate them */
+ s = splbio();
+ nreq = 0;
+ for (i = 0; i < MPT_MAX_REQUESTS(mpt); i++) {
+ req = &mpt->request_pool[i];
+ xs = req->xfer;
+ if (xs != NULL) {
+ if (xs->datalen != 0)
+ bus_dmamap_unload(mpt->sc_dmat, req->dmap);
+ req->xfer = NULL;
+ callout_stop(&xs->xs_callout);
+ if (req != req0) {
+ nreq++;
+ xs->error = XS_REQUEUE;
+ }
+ scsipi_done(xs);
+ /* don't really need to mpt_free_request() since mpt_init() below will free all requests anyway */
+ mpt_free_request(mpt, req);
+ }
+ }
splx(s);
+ if (nreq > 0)
+ mpt_prt(mpt, "re-queued %d requests", nreq);
+
+ /* re-initialize the IOC (which restarts it) */
+ if (mpt_init(mpt, MPT_DB_INIT_HOST) == 0)
+ mpt_prt(mpt, "restart succeeded");
+ /* else error message already printed */
+
+ /* thaw the channel, causing scsipi to re-queue the commands */
+ scsipi_channel_thaw(&mpt->sc_channel, 1);
+}
+
+static
+int mpt_drain_queue(mpt_softc_t *mpt)
+{
+ int nrepl = 0;
+ uint32_t reply;
+
+ reply = mpt_pop_reply_queue(mpt);
+ while (reply != MPT_REPLY_EMPTY) {
+ nrepl++;
+ if (mpt->verbose > 1) {
+ if ((reply & MPT_CONTEXT_REPLY) != 0) {
+ /* Address reply; IOC has something to say */
+ mpt_print_reply(MPT_REPLY_PTOV(mpt, reply));
+ } else {
+ /* Context reply; all went well */
+ mpt_prt(mpt, "context %u reply OK", reply);
+ }
+ }
+ mpt_done(mpt, reply);
+ reply = mpt_pop_reply_queue(mpt);
+ }
+ return (nrepl);
}
static void
@@ -409,6 +495,7 @@ mpt_done(mpt_softc_t *mpt, uint32_t repl
request_t *req;
MSG_REQUEST_HEADER *mpt_req;
MSG_SCSI_IO_REPLY *mpt_reply;
+ int restart = 0; /*nonzero if we need to restart the IOC*/
if (__predict_true((reply & MPT_CONTEXT_REPLY) == 0)) {
/* context reply (ok) */
@@ -468,6 +555,8 @@ mpt_done(mpt_softc_t *mpt, uint32_t repl
if (__predict_false(mpt_req->Function == MPI_FUNCTION_SCSI_TASK_MGMT)) {
if (mpt->verbose > 1)
mpt_prt(mpt, "mpt_done: TASK MGMT");
+ KASSERT(req == mpt->mngt_req);
+ mpt->mngt_req = NULL;
goto done;
}
@@ -544,9 +633,10 @@ mpt_done(mpt_softc_t *mpt, uint32_t repl
}
xs->status = mpt_reply->SCSIStatus;
- switch (le16toh(mpt_reply->IOCStatus)) {
+ switch ((le16toh(mpt_reply->IOCStatus) & MPI_IOCSTATUS_MASK)) {
case MPI_IOCSTATUS_SCSI_DATA_OVERRUN:
xs->error = XS_DRIVER_STUFFUP;
+ mpt_prt(mpt,"mpt_done: IOC overrun!");
break;
case MPI_IOCSTATUS_SCSI_DATA_UNDERRUN:
@@ -605,30 +695,56 @@ mpt_done(mpt_softc_t *mpt, uint32_t repl
case MPI_IOCSTATUS_SCSI_RESIDUAL_MISMATCH:
xs->error = XS_DRIVER_STUFFUP;
+ mpt_prt(mpt,"mpt_done: IOC SCSI residual mismatch!");
+ restart = 1;
break;
case MPI_IOCSTATUS_SCSI_TASK_TERMINATED:
/* XXX What should we do here? */
+ mpt_prt(mpt,"mpt_done: IOC SCSI task terminated!");
+ restart = 1;
break;
case MPI_IOCSTATUS_SCSI_TASK_MGMT_FAILED:
/* XXX */
xs->error = XS_DRIVER_STUFFUP;
+ mpt_prt(mpt,"mpt_done: IOC SCSI task failed!");
+ restart = 1;
break;
case MPI_IOCSTATUS_SCSI_IOC_TERMINATED:
/* XXX */
xs->error = XS_DRIVER_STUFFUP;
+ mpt_prt(mpt,"mpt_done: IOC task terminated!");
+ restart = 1;
break;
case MPI_IOCSTATUS_SCSI_EXT_TERMINATED:
/* XXX This is a bus-reset */
xs->error = XS_DRIVER_STUFFUP;
+ mpt_prt(mpt,"mpt_done: IOC SCSI bus reset!");
+ restart = 1;
+ break;
+
+ case MPI_IOCSTATUS_SCSI_PROTOCOL_ERROR:
+ /*
+ *FreeBSD and Linux indicate this is a phase error between
+ *the IOC and the drive itself.
+ *When this happens, the IOC becomes unhappy and stops processing
+ *all transactions. Call mpt_timeout which knows how to
+ *get the IOC back on its feet.
+ */
+ mpt_prt(mpt,"mpt_done: IOC indicates protocol error -- recovering...");
+ xs->error = XS_TIMEOUT;
+ restart = 1;
+
break;
default:
/* XXX unrecognized HBA error */
xs->error = XS_DRIVER_STUFFUP;
+ mpt_prt(mpt,"mpt_done: IOC returned unknown code: 0x%x",le16toh(mpt_reply->IOCStatus));
+ restart = 1;
break;
}
@@ -645,6 +761,11 @@ mpt_done(mpt_softc_t *mpt, uint32_t repl
}
done:
+ if (le16toh(mpt_reply->IOCStatus) & MPI_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) {
+ mpt_prt(mpt,"mpt_done: IOC has error - logging...\n");
+ mpt_ctlop(mpt, mpt_reply, reply);
+ }
+
/* If IOC done with this requeset, free it up. */
if (mpt_reply == NULL || (mpt_reply->MsgFlags & 0x80) == 0)
mpt_free_request(mpt, req);
@@ -655,6 +776,11 @@ mpt_done(mpt_softc_t *mpt, uint32_t repl
if (xs != NULL)
scsipi_done(xs);
+
+ if (restart) {
+ mpt_prt(mpt,"mpt_done: IOC fatal error: restarting...");
+ mpt_restart(mpt, NULL);
+ }
}
static void
@@ -928,6 +1054,12 @@ mpt_run_xfer(mpt_softc_t *mpt, struct sc
if (mpt->verbose > 1)
mpt_print_scsi_io_request(mpt_req);
+ if (xs->timeout == 0) {
+ mpt_prt(mpt,"mpt_run_xfer: no timeout specified for request: 0x%x\n",
+ req->index);
+ xs->timeout = 500;
+ }
+
s = splbio();
if (__predict_true((xs->xs_control & XS_CTL_POLL) == 0))
callout_reset(&xs->xs_callout,
@@ -1340,7 +1472,44 @@ mpt_event_notify_reply(mpt_softc_t *mpt,
}
}
-/* XXXJRT mpt_bus_reset() */
+static void
+mpt_bus_reset(mpt_softc_t *mpt)
+{
+ request_t *req;
+ MSG_SCSI_TASK_MGMT *mngt_req;
+ int s;
+
+ s = splbio();
+ if (mpt->mngt_req) {
+ /* request already queued; can't do more */
+ splx(s);
+ return;
+ }
+ req = mpt_get_request(mpt);
+ if (__predict_false(req == NULL)) {
+ mpt_prt(mpt, "no mngt request\n");
+ splx(s);
+ return;
+ }
+ mpt->mngt_req = req;
+ splx(s);
+ mngt_req = req->req_vbuf;
+ memset(mngt_req, 0, sizeof(*mngt_req));
+ mngt_req->Function = MPI_FUNCTION_SCSI_TASK_MGMT;
+ mngt_req->Bus = mpt->bus;
+ mngt_req->TargetID = 0;
+ mngt_req->ChainOffset = 0;
+ mngt_req->TaskType = MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS;
+ mngt_req->Reserved1 = 0;
+ mngt_req->MsgFlags =
+ mpt->is_fc ? MPI_SCSITASKMGMT_MSGFLAGS_LIP_RESET_OPTION : 0;
+ mngt_req->MsgContext = req->index;
+ mngt_req->TaskMsgContext = 0;
+ s = splbio();
+ mpt_send_handshake_cmd(mpt, sizeof(*mngt_req), mngt_req);
+ /*mpt_enable_ints(mpt);*/
+ splx(s);
+}
/*****************************************************************************
* SCSI interface routines
@@ -1382,3 +1551,23 @@ mpt_minphys(struct buf *bp)
bp->b_bcount = MPT_MAX_XFER;
minphys(bp);
}
+
+static int
+mpt_ioctl(struct scsipi_channel *chan, u_long cmd, void *arg,
+ int flag, struct proc *p)
+{
+ mpt_softc_t *mpt;
+ int s;
+
+ mpt = device_private(chan->chan_adapter->adapt_dev);
+ switch (cmd) {
+ case SCBUSIORESET:
+ mpt_bus_reset(mpt);
+ s = splbio();
+ mpt_intr(mpt);
+ splx(s);
+ return(0);
+ default:
+ return (ENOTTY);
+ }
+}
Index: src/sys/dev/ic/mpt_netbsd.h
diff -u src/sys/dev/ic/mpt_netbsd.h:1.10 src/sys/dev/ic/mpt_netbsd.h:1.11
--- src/sys/dev/ic/mpt_netbsd.h:1.10 Sun Mar 18 21:05:21 2012
+++ src/sys/dev/ic/mpt_netbsd.h Tue Apr 1 23:57:54 2014
@@ -1,4 +1,4 @@
-/* $NetBSD: mpt_netbsd.h,v 1.10 2012/03/18 21:05:21 martin Exp $ */
+/* $NetBSD: mpt_netbsd.h,v 1.11 2014/04/01 23:57:54 buhrow Exp $ */
/*
* Copyright (c) 2003 Wasabi Systems, Inc.
@@ -230,9 +230,11 @@ typedef struct mpt_softc {
/* SCSIPI and software management */
request_t *request_pool;
SLIST_HEAD(req_queue, req_entry) request_free_list;
+ request_t *mngt_req;
struct scsipi_adapter sc_adapter;
struct scsipi_channel sc_channel;
+ device_t sc_scsibus_dv; /*So we can rescan in case of errors*/
uint32_t sequence; /* sequence number */
uint32_t timeouts; /* timeout count */