This is great but the real question is why does the IO get jammed?

On Fri, Dec 24, 2010 at 04:09:23PM +1000, David Gwynne wrote:
> i can reliably produce a situation where an io on a disk attached
> to mpii(4) never completes. this implements timeouts on scsi io so
> we can recover from this situation.
> 
> ok?
> 
> Index: mpii.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/mpii.c,v
> retrieving revision 1.35
> diff -u -p -r1.35 mpii.c
> --- mpii.c    23 Aug 2010 00:53:36 -0000      1.35
> +++ mpii.c    24 Dec 2010 06:04:38 -0000
> @@ -1757,7 +1757,8 @@ struct mpii_ccb {
>       volatile enum {
>               MPII_CCB_FREE,
>               MPII_CCB_READY,
> -             MPII_CCB_QUEUED
> +             MPII_CCB_QUEUED,
> +             MPII_CCB_TIMEOUT
>       }                       ccb_state;
>  
>       void                    (*ccb_done)(struct mpii_ccb *);
> @@ -1822,6 +1823,15 @@ struct mpii_softc {
>       struct mpii_ccb_list    sc_ccb_free;
>       struct mutex            sc_ccb_free_mtx;
>  
> +     struct mutex            sc_ccb_mtx;
> +                             /*
> +                              * this protects the ccb state and list entry
> +                              * between mpii_scsi_cmd and scsidone.
> +                              */
> +
> +     struct mpii_ccb_list    sc_ccb_tmos;
> +     struct scsi_iohandler   sc_ccb_tmo_handler;
> +
>       struct scsi_iopool      sc_iopool;
>  
>       struct mpii_dmamem      *sc_requests;
> @@ -1894,6 +1904,10 @@ int            mpii_alloc_queues(struct mpii_softc
>  void         mpii_push_reply(struct mpii_softc *, struct mpii_rcb *);
>  void         mpii_push_replies(struct mpii_softc *);
>  
> +void         mpii_scsi_cmd_tmo(void *);
> +void         mpii_scsi_cmd_tmo_handler(void *, void *);
> +void         mpii_scsi_cmd_tmo_done(struct mpii_ccb *);
> +
>  int          mpii_alloc_dev(struct mpii_softc *);
>  int          mpii_insert_dev(struct mpii_softc *, struct mpii_device *);
>  int          mpii_remove_dev(struct mpii_softc *, struct mpii_device *);
> @@ -4035,7 +4049,11 @@ mpii_alloc_ccbs(struct mpii_softc *sc)
>       int                     i;
>  
>       SLIST_INIT(&sc->sc_ccb_free);
> +     SLIST_INIT(&sc->sc_ccb_tmos);
>       mtx_init(&sc->sc_ccb_free_mtx, IPL_BIO);
> +     mtx_init(&sc->sc_ccb_mtx, IPL_BIO);
> +     scsi_ioh_set(&sc->sc_ccb_tmo_handler, &sc->sc_iopool,
> +         mpii_scsi_cmd_tmo_handler, sc);
>  
>       sc->sc_ccbs = malloc(sizeof(*ccb) * (sc->sc_request_depth-1),
>           M_DEVBUF, M_NOWAIT | M_ZERO);
> @@ -4448,6 +4466,7 @@ mpii_scsi_cmd(struct scsi_xfer *xs)
>       DNPRINTF(MPII_D_CMD, "%s:  Offset0: 0x%02x\n", DEVNAME(sc),
>           io->sgl_offset0);
>  
> +     timeout_set(&xs->stimeout, mpii_scsi_cmd_tmo, ccb);
>       if (xs->flags & SCSI_POLL) {
>               if (mpii_poll(sc, ccb) != 0) {
>                       xs->error = XS_DRIVER_STUFFUP;
> @@ -4459,10 +4478,66 @@ mpii_scsi_cmd(struct scsi_xfer *xs)
>       DNPRINTF(MPII_D_CMD, "%s:    mpii_scsi_cmd(): opcode: %02x "
>           "datalen: %d\n", DEVNAME(sc), xs->cmd->opcode, xs->datalen);
>  
> +     timeout_add_msec(&xs->stimeout, xs->timeout);
>       mpii_start(sc, ccb);
>  }
>  
>  void
> +mpii_scsi_cmd_tmo(void *xccb)
> +{
> +     struct mpii_ccb         *ccb = xccb;
> +     struct mpii_softc       *sc = ccb->ccb_sc;
> +
> +     printf("%s: mpii_scsi_cmd_tmo\n", DEVNAME(sc));
> +
> +     mtx_enter(&sc->sc_ccb_mtx);
> +     if (ccb->ccb_state == MPII_CCB_QUEUED) {
> +             ccb->ccb_state = MPII_CCB_TIMEOUT;
> +             SLIST_INSERT_HEAD(&sc->sc_ccb_tmos, ccb, ccb_link);
> +     }
> +     mtx_leave(&sc->sc_ccb_mtx);
> +
> +     scsi_ioh_add(&sc->sc_ccb_tmo_handler);
> +}
> +
> +void
> +mpii_scsi_cmd_tmo_handler(void *cookie, void *io)
> +{
> +     struct mpii_softc                       *sc = cookie;
> +     struct mpii_ccb                         *tccb = io;
> +     struct mpii_ccb                         *ccb;
> +     struct mpii_msg_scsi_task_request       *stq;
> +
> +     mtx_enter(&sc->sc_ccb_mtx);
> +     ccb = SLIST_FIRST(&sc->sc_ccb_tmos);
> +     if (ccb != NULL) {
> +             SLIST_REMOVE_HEAD(&sc->sc_ccb_tmos, ccb_link);
> +             ccb->ccb_state = MPII_CCB_QUEUED;
> +     }
> +     /* should remove any other ccbs for the same dev handle */
> +     mtx_leave(&sc->sc_ccb_mtx);
> +
> +     if (ccb == NULL) {
> +             scsi_io_put(&sc->sc_iopool, tccb);
> +             return;
> +     }
> +
> +     stq = tccb->ccb_cmd;
> +     stq->function = MPII_FUNCTION_SCSI_TASK_MGMT;
> +     stq->task_type = MPII_SCSI_TASK_TARGET_RESET;
> +     stq->dev_handle = htole16(ccb->ccb_dev_handle);
> +
> +     tccb->ccb_done = mpii_scsi_cmd_tmo_done;
> +     mpii_start(sc, tccb);
> +}
> +
> +void
> +mpii_scsi_cmd_tmo_done(struct mpii_ccb *tccb)
> +{
> +     mpii_scsi_cmd_tmo_handler(tccb->ccb_sc, tccb);
> +}
> +
> +void
>  mpii_scsi_cmd_done(struct mpii_ccb *ccb)
>  {
>       struct mpii_msg_scsi_io_error   *sie;
> @@ -4470,6 +4545,14 @@ mpii_scsi_cmd_done(struct mpii_ccb *ccb)
>       struct scsi_xfer        *xs = ccb->ccb_cookie;
>       struct mpii_ccb_bundle  *mcb = ccb->ccb_cmd;
>       bus_dmamap_t            dmap = ccb->ccb_dmamap;
> +
> +     timeout_del(&xs->stimeout);
> +     mtx_enter(&sc->sc_ccb_mtx);
> +     if (ccb->ccb_state == MPII_CCB_TIMEOUT)
> +             SLIST_REMOVE(&sc->sc_ccb_tmos, ccb, mpii_ccb, ccb_link);
> +
> +     ccb->ccb_state = MPII_CCB_READY;
> +     mtx_leave(&sc->sc_ccb_mtx);
>  
>       if (xs->datalen != 0) {
>               bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,

Reply via email to