i can reliably produce a situation where an io on a disk attached to mpii(4) never completes. this implements timeouts on scsi io so we can recover from this situation.
ok? Index: mpii.c =================================================================== RCS file: /cvs/src/sys/dev/pci/mpii.c,v retrieving revision 1.35 diff -u -p -r1.35 mpii.c --- mpii.c 23 Aug 2010 00:53:36 -0000 1.35 +++ mpii.c 24 Dec 2010 06:04:38 -0000 @@ -1757,7 +1757,8 @@ struct mpii_ccb { volatile enum { MPII_CCB_FREE, MPII_CCB_READY, - MPII_CCB_QUEUED + MPII_CCB_QUEUED, + MPII_CCB_TIMEOUT } ccb_state; void (*ccb_done)(struct mpii_ccb *); @@ -1822,6 +1823,15 @@ struct mpii_softc { struct mpii_ccb_list sc_ccb_free; struct mutex sc_ccb_free_mtx; + struct mutex sc_ccb_mtx; + /* + * this protects the ccb state and list entry + * between mpii_scsi_cmd and scsidone. + */ + + struct mpii_ccb_list sc_ccb_tmos; + struct scsi_iohandler sc_ccb_tmo_handler; + struct scsi_iopool sc_iopool; struct mpii_dmamem *sc_requests; @@ -1894,6 +1904,10 @@ int mpii_alloc_queues(struct mpii_softc void mpii_push_reply(struct mpii_softc *, struct mpii_rcb *); void mpii_push_replies(struct mpii_softc *); +void mpii_scsi_cmd_tmo(void *); +void mpii_scsi_cmd_tmo_handler(void *, void *); +void mpii_scsi_cmd_tmo_done(struct mpii_ccb *); + int mpii_alloc_dev(struct mpii_softc *); int mpii_insert_dev(struct mpii_softc *, struct mpii_device *); int mpii_remove_dev(struct mpii_softc *, struct mpii_device *); @@ -4035,7 +4049,11 @@ mpii_alloc_ccbs(struct mpii_softc *sc) int i; SLIST_INIT(&sc->sc_ccb_free); + SLIST_INIT(&sc->sc_ccb_tmos); mtx_init(&sc->sc_ccb_free_mtx, IPL_BIO); + mtx_init(&sc->sc_ccb_mtx, IPL_BIO); + scsi_ioh_set(&sc->sc_ccb_tmo_handler, &sc->sc_iopool, + mpii_scsi_cmd_tmo_handler, sc); sc->sc_ccbs = malloc(sizeof(*ccb) * (sc->sc_request_depth-1), M_DEVBUF, M_NOWAIT | M_ZERO); @@ -4448,6 +4466,7 @@ mpii_scsi_cmd(struct scsi_xfer *xs) DNPRINTF(MPII_D_CMD, "%s: Offset0: 0x%02x\n", DEVNAME(sc), io->sgl_offset0); + timeout_set(&xs->stimeout, mpii_scsi_cmd_tmo, ccb); if (xs->flags & SCSI_POLL) { if (mpii_poll(sc, ccb) != 0) { xs->error = XS_DRIVER_STUFFUP; @@ -4459,10 +4478,66 @@ mpii_scsi_cmd(struct scsi_xfer *xs) DNPRINTF(MPII_D_CMD, "%s: mpii_scsi_cmd(): opcode: %02x " "datalen: %d\n", DEVNAME(sc), xs->cmd->opcode, xs->datalen); + timeout_add_msec(&xs->stimeout, xs->timeout); mpii_start(sc, ccb); } void +mpii_scsi_cmd_tmo(void *xccb) +{ + struct mpii_ccb *ccb = xccb; + struct mpii_softc *sc = ccb->ccb_sc; + + printf("%s: mpii_scsi_cmd_tmo\n", DEVNAME(sc)); + + mtx_enter(&sc->sc_ccb_mtx); + if (ccb->ccb_state == MPII_CCB_QUEUED) { + ccb->ccb_state = MPII_CCB_TIMEOUT; + SLIST_INSERT_HEAD(&sc->sc_ccb_tmos, ccb, ccb_link); + } + mtx_leave(&sc->sc_ccb_mtx); + + scsi_ioh_add(&sc->sc_ccb_tmo_handler); +} + +void +mpii_scsi_cmd_tmo_handler(void *cookie, void *io) +{ + struct mpii_softc *sc = cookie; + struct mpii_ccb *tccb = io; + struct mpii_ccb *ccb; + struct mpii_msg_scsi_task_request *stq; + + mtx_enter(&sc->sc_ccb_mtx); + ccb = SLIST_FIRST(&sc->sc_ccb_tmos); + if (ccb != NULL) { + SLIST_REMOVE_HEAD(&sc->sc_ccb_tmos, ccb_link); + ccb->ccb_state = MPII_CCB_QUEUED; + } + /* should remove any other ccbs for the same dev handle */ + mtx_leave(&sc->sc_ccb_mtx); + + if (ccb == NULL) { + scsi_io_put(&sc->sc_iopool, tccb); + return; + } + + stq = tccb->ccb_cmd; + stq->function = MPII_FUNCTION_SCSI_TASK_MGMT; + stq->task_type = MPII_SCSI_TASK_TARGET_RESET; + stq->dev_handle = htole16(ccb->ccb_dev_handle); + + tccb->ccb_done = mpii_scsi_cmd_tmo_done; + mpii_start(sc, tccb); +} + +void +mpii_scsi_cmd_tmo_done(struct mpii_ccb *tccb) +{ + mpii_scsi_cmd_tmo_handler(tccb->ccb_sc, tccb); +} + +void mpii_scsi_cmd_done(struct mpii_ccb *ccb) { struct mpii_msg_scsi_io_error *sie; @@ -4470,6 +4545,14 @@ mpii_scsi_cmd_done(struct mpii_ccb *ccb) struct scsi_xfer *xs = ccb->ccb_cookie; struct mpii_ccb_bundle *mcb = ccb->ccb_cmd; bus_dmamap_t dmap = ccb->ccb_dmamap; + + timeout_del(&xs->stimeout); + mtx_enter(&sc->sc_ccb_mtx); + if (ccb->ccb_state == MPII_CCB_TIMEOUT) + SLIST_REMOVE(&sc->sc_ccb_tmos, ccb, mpii_ccb, ccb_link); + + ccb->ccb_state = MPII_CCB_READY; + mtx_leave(&sc->sc_ccb_mtx); if (xs->datalen != 0) { bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,