The SCSI ioctl reset path is smart enough to set the
flag tmf_in_progress when a user-requested reset is
processed, but it does not wait for IO that is in
flight. This can result in lost IOs and hung
processes. We should wait for a reasonable amount
of time for either the IOs to complete or to fail
the request.

Signed-off-by: Lee Duncan <ldun...@suse.com>
---
 drivers/scsi/scsi_error.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 38942050b265..b964152611c3 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -57,6 +57,14 @@
 #define BUS_RESET_SETTLE_TIME   (10)
 #define HOST_RESET_SETTLE_TIME  (10)
 
+/*
+ * Time to wait for outstanding IOs when about to send
+ * a device reset, e.g. sg_reset. The msecs to wait must
+ * be an multiple of the msecs to wait per try.
+ */
+#define MSECS_PER_TRY_FOR_IO_ON_RESET  500
+#define MSECS_TO_WAIT_FOR_IO_ON_RESET  (MSECS_PER_TRY_FOR_IO_ON_RESET * 10)
+
 static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
 static int scsi_try_to_abort_cmd(struct scsi_host_template *,
                                 struct scsi_cmnd *);
@@ -2269,6 +2277,7 @@ void scsi_report_device_reset(struct Scsi_Host *shost, 
int channel, int target)
        struct request *rq;
        unsigned long flags;
        int error = 0, rtn, val;
+       unsigned int msecs_to_wait = MSECS_TO_WAIT_FOR_IO_ON_RESET;
 
        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
                return -EACCES;
@@ -2301,6 +2310,22 @@ void scsi_report_device_reset(struct Scsi_Host *shost, 
int channel, int target)
 
        spin_lock_irqsave(shost->host_lock, flags);
        shost->tmf_in_progress = 1;
+
+       /* if any IOs in progress wait for them a while */
+       while ((atomic_read(&shost->host_busy) > 0) && (msecs_to_wait > 0)) {
+               spin_unlock_irqrestore(shost->host_lock, flags);
+               msleep(MSECS_PER_TRY_FOR_IO_ON_RESET);
+               msecs_to_wait -= MSECS_PER_TRY_FOR_IO_ON_RESET;
+               spin_lock_irqsave(shost->host_lock, flags);
+       }
+       if (atomic_read(&shost->host_busy)) {
+               shost->tmf_in_progress = 0;
+               spin_unlock_irqrestore(shost->host_lock, flags);
+               SCSI_LOG_ERROR_RECOVERY(3,
+                   printk("%s: device reset failed: outstanding IO\n", 
__func__));
+               goto out_put_scmd_and_free;
+       }
+
        spin_unlock_irqrestore(shost->host_lock, flags);
 
        switch (val & ~SG_SCSI_RESET_NO_ESCALATE) {
@@ -2349,6 +2374,7 @@ void scsi_report_device_reset(struct Scsi_Host *shost, 
int channel, int target)
        wake_up(&shost->host_wait);
        scsi_run_host_queues(shost);
 
+out_put_scmd_and_free:
        scsi_put_command(scmd);
        kfree(rq);
 
-- 
1.8.5.6

Reply via email to