On 31/12/2010, at 3:51 AM, Mike Belopuhov wrote:
> On Fri, Dec 24, 2010 at 16:09 +1000, David Gwynne wrote:
>> i can reliably produce a situation where an io on a disk attached
>> to mpii(4) never completes. this implements timeouts on scsi io so
>> we can recover from this situation.
>>
>> ok?
>>
>
> although, you've already committed the change, i have two questions
> regarding this. please find them inline.
cool :) answers inline too.
>
>> Index: mpii.c
>> ===================================================================
>> RCS file: /cvs/src/sys/dev/pci/mpii.c,v
>> retrieving revision 1.35
>> diff -u -p -r1.35 mpii.c
>> --- mpii.c 23 Aug 2010 00:53:36 -0000 1.35
>> +++ mpii.c 24 Dec 2010 06:04:38 -0000
>> @@ -4448,6 +4466,7 @@ mpii_scsi_cmd(struct scsi_xfer *xs)
>> DNPRINTF(MPII_D_CMD, "%s: Offset0: 0x%02x\n", DEVNAME(sc),
>> io->sgl_offset0);
>>
>> + timeout_set(&xs->stimeout, mpii_scsi_cmd_tmo, ccb);
>> if (xs->flags & SCSI_POLL) {
>> if (mpii_poll(sc, ccb) != 0) {
>> xs->error = XS_DRIVER_STUFFUP;
>> @@ -4459,10 +4478,66 @@ mpii_scsi_cmd(struct scsi_xfer *xs)
>> DNPRINTF(MPII_D_CMD, "%s: mpii_scsi_cmd(): opcode: %02x "
>> "datalen: %d\n", DEVNAME(sc), xs->cmd->opcode, xs->datalen);
>>
>> + timeout_add_msec(&xs->stimeout, xs->timeout);
>> mpii_start(sc, ccb);
>> }
>>
>> void
>> +mpii_scsi_cmd_tmo(void *xccb)
>> +{
>> + struct mpii_ccb *ccb = xccb;
>> + struct mpii_softc *sc = ccb->ccb_sc;
>> +
>> + printf("%s: mpii_scsi_cmd_tmo\n", DEVNAME(sc));
>> +
>> + mtx_enter(&sc->sc_ccb_mtx);
>> + if (ccb->ccb_state == MPII_CCB_QUEUED) {
>> + ccb->ccb_state = MPII_CCB_TIMEOUT;
>> + SLIST_INSERT_HEAD(&sc->sc_ccb_tmos, ccb, ccb_link);
>> + }
>> + mtx_leave(&sc->sc_ccb_mtx);
>> +
>> + scsi_ioh_add(&sc->sc_ccb_tmo_handler);
>> +}
>> +
>> +void
>> +mpii_scsi_cmd_tmo_handler(void *cookie, void *io)
>> +{
>> + struct mpii_softc *sc = cookie;
>> + struct mpii_ccb *tccb = io;
>> + struct mpii_ccb *ccb;
>> + struct mpii_msg_scsi_task_request *stq;
>> +
>> + mtx_enter(&sc->sc_ccb_mtx);
>> + ccb = SLIST_FIRST(&sc->sc_ccb_tmos);
>> + if (ccb != NULL) {
>> + SLIST_REMOVE_HEAD(&sc->sc_ccb_tmos, ccb_link);
>> + ccb->ccb_state = MPII_CCB_QUEUED;
>> + }
>> + /* should remove any other ccbs for the same dev handle */
>> + mtx_leave(&sc->sc_ccb_mtx);
>> +
>> + if (ccb == NULL) {
>> + scsi_io_put(&sc->sc_iopool, tccb);
>> + return;
>> + }
>> +
>> + stq = tccb->ccb_cmd;
>> + stq->function = MPII_FUNCTION_SCSI_TASK_MGMT;
>> + stq->task_type = MPII_SCSI_TASK_TARGET_RESET;
>> + stq->dev_handle = htole16(ccb->ccb_dev_handle);
>> +
>
> why do you issue 'target reset' instead of 'abort task' here?
thats what solaris and linux do.
>
>> + tccb->ccb_done = mpii_scsi_cmd_tmo_done;
>> + mpii_start(sc, tccb);
>> +}
>> +
>> +void
>> +mpii_scsi_cmd_tmo_done(struct mpii_ccb *tccb)
>> +{
>> + mpii_scsi_cmd_tmo_handler(tccb->ccb_sc, tccb);
>> +}
>> +
>
> why do you call this function again from here?
xs timeouts put the xs on a list to be serviced. the io handler services that
list. the done function calling the handler again is the way it pulls the next
one off the list.
dlg