[PATCH 4/4] scsi: ufs: Improve UFS fatal error handling

2013-08-14 Thread Santosh Y
From: Sujit Reddy Thumma 

Error handling in UFS driver is broken and resets the host controller
for fatal errors without re-initialization. Correct the fatal error
handling sequence according to UFS Host Controller Interface (HCI)
v1.1 specification.

o Processed requests which are completed w/wo error are reported to
  SCSI layer and any pending commands that are not started are aborted
  in the controller and re-queued into scsi mid-layer queue.

o Upon determining fatal error condition the host controller may hang
  forever until a reset is applied. Block SCSI layer for sending new
  requests and apply reset in a separate error handling work.

o SCSI is informed about the expected Unit-Attention exception from the
  device for the immediate command after a reset so that the SCSI layer
  take necessary steps to establish communication with the device.

Signed-off-by: Sujit Reddy Thumma 
Reviewed-by: Yaniv Gardi 
Tested-by: Dolev Raviv 
Signed-off-by: Santosh Y 

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index dfa61be..a0f5ac2 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -79,6 +79,14 @@ enum {
UFSHCD_EH_IN_PROGRESS = (1 << 0),
 };
 
+/* UFSHCD UIC layer error flags */
+enum {
+   UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
+   UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
+   UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
+   UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
+};
+
 /* Interrupt configuration options */
 enum {
UFSHCD_INT_DISABLE,
@@ -101,6 +109,8 @@ enum {
 
 static void ufshcd_tmc_handler(struct ufs_hba *hba);
 static void ufshcd_async_scan(void *data, async_cookie_t cookie);
+static int ufshcd_reset_and_restore(struct ufs_hba *hba);
+static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);
 
 /*
  * ufshcd_wait_for_register - wait for register value to change
@@ -1523,9 +1533,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba 
*hba)
goto out;
}
 
-   if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-   scsi_unblock_requests(hba->host);
-
 out:
return err;
 }
@@ -1651,66 +1658,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba)
 }
 
 /**
- * ufshcd_do_reset - reset the host controller
- * @hba: per adapter instance
- *
- * Returns SUCCESS/FAILED
- */
-static int ufshcd_do_reset(struct ufs_hba *hba)
-{
-   struct ufshcd_lrb *lrbp;
-   unsigned long flags;
-   int tag;
-
-   /* block commands from midlayer */
-   scsi_block_requests(hba->host);
-
-   spin_lock_irqsave(hba->host->host_lock, flags);
-   hba->ufshcd_state = UFSHCD_STATE_RESET;
-
-   /* send controller to reset state */
-   ufshcd_hba_stop(hba);
-   spin_unlock_irqrestore(hba->host->host_lock, flags);
-
-   /* abort outstanding commands */
-   for (tag = 0; tag < hba->nutrs; tag++) {
-   if (test_bit(tag, &hba->outstanding_reqs)) {
-   lrbp = &hba->lrb[tag];
-   if (lrbp->cmd) {
-   scsi_dma_unmap(lrbp->cmd);
-   lrbp->cmd->result = DID_RESET << 16;
-   lrbp->cmd->scsi_done(lrbp->cmd);
-   lrbp->cmd = NULL;
-   clear_bit_unlock(tag, &hba->lrb_in_use);
-   }
-   }
-   }
-
-   /* complete device management command */
-   if (hba->dev_cmd.complete)
-   complete(hba->dev_cmd.complete);
-
-   /* clear outstanding request/task bit maps */
-   hba->outstanding_reqs = 0;
-   hba->outstanding_tasks = 0;
-
-   /* Host controller enable */
-   if (ufshcd_hba_enable(hba)) {
-   dev_err(hba->dev,
-   "Reset: Controller initialization failed\n");
-   return FAILED;
-   }
-
-   if (ufshcd_link_startup(hba)) {
-   dev_err(hba->dev,
-   "Reset: Link start-up failed\n");
-   return FAILED;
-   }
-
-   return SUCCESS;
-}
-
-/**
  * ufshcd_slave_alloc - handle initial SCSI device configurations
  * @sdev: pointer to SCSI device
  *
@@ -1727,6 +1674,9 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
sdev->use_10_for_ms = 1;
scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
 
+   /* allow SCSI layer to restart the device in case of errors */
+   sdev->allow_restart = 1;
+
/*
 * Inform SCSI Midlayer that the LUN queue depth is same as the
 * controller queue depth. If a LUN queue depth is less than the
@@ -1930,6 +1880,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct 
ufshcd_lrb *lrbp)
case OCS_ABORTED:
result |= DID_ABORT << 16;
break;
+   case OCS_INVALID_COMMAND_STATUS:
+   result |= DID_REQUEUE << 16;
+   break;

[PATCH 4/4] scsi: ufs: Improve UFS fatal error handling

2013-06-13 Thread Sujit Reddy Thumma
Error handling in UFS driver is broken and resets the host controller
for fatal errors without re-initialization. Correct the fatal error
handling sequence according to UFS Host Controller Interface (HCI)
v1.1 specification.

o Upon determining fatal error condition the host controller may hang
  forever until a reset is applied, so just retrying the command doesn't
  work without a reset. So, the reset is applied in the driver context
  in a separate work and SCSI mid-layer isn't informed until reset is
  applied.

o Processed requests which are completed without error are reported to
  SCSI layer as successful and any pending commands that are not started
  yet or are not cause of the error are re-queued into scsi midlayer queue.
  For the command that caused error, host controller or device is reset
  and DID_ERROR is returned for command retry after applying reset.

o SCSI is informed about the expected Unit-Attentioni exception from the
  device for the immediate command after a reset so that the SCSI layer
  take necessary steps to establish communication with the device.

Signed-off-by: Sujit Reddy Thumma 
---
 drivers/scsi/ufs/ufshcd.c |  348 +++--
 drivers/scsi/ufs/ufshcd.h |2 +
 drivers/scsi/ufs/ufshci.h |   19 ++-
 3 files changed, 293 insertions(+), 76 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index e368bb0..cca774e 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -84,6 +84,14 @@ enum {
UFSHCD_EH_DEVICE_RESET_PENDING = (1 << 1),
 };
 
+/* UFSHCD UIC layer error flags */
+enum {
+   UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
+   UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
+   UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
+   UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
+};
+
 /* Interrupt configuration options */
 enum {
UFSHCD_INT_DISABLE,
@@ -112,6 +120,7 @@ enum {
 
 static void ufshcd_tmc_handler(struct ufs_hba *hba);
 static void ufshcd_async_scan(void *data, async_cookie_t cookie);
+static int ufshcd_reset_and_restore(struct ufs_hba *hba);
 
 /*
  * ufshcd_wait_for_register - wait for register value to change
@@ -1570,9 +1579,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba 
*hba)
goto out;
}
 
-   if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-   scsi_unblock_requests(hba->host);
-
 out:
return err;
 }
@@ -1698,65 +1704,6 @@ static int ufshcd_validate_dev_connection(struct ufs_hba 
*hba)
 }
 
 /**
- * ufshcd_do_reset - reset the host controller
- * @hba: per adapter instance
- *
- * Returns SUCCESS/FAILED
- */
-static int ufshcd_do_reset(struct ufs_hba *hba)
-{
-   struct ufshcd_lrb *lrbp;
-   unsigned long flags;
-   int tag;
-
-   /* block commands from midlayer */
-   scsi_block_requests(hba->host);
-
-   spin_lock_irqsave(hba->host->host_lock, flags);
-   hba->ufshcd_state = UFSHCD_STATE_RESET;
-
-   /* send controller to reset state */
-   ufshcd_hba_stop(hba);
-   spin_unlock_irqrestore(hba->host->host_lock, flags);
-
-   /* abort outstanding commands */
-   for (tag = 0; tag < SCSI_CMD_QUEUE_SIZE; tag++) {
-   if (test_bit(tag, &hba->outstanding_reqs)) {
-   lrbp = &hba->lrb[tag];
-   if (lrbp->cmd) {
-   scsi_dma_unmap(lrbp->cmd);
-   lrbp->cmd->result = DID_RESET << 16;
-   lrbp->cmd->scsi_done(lrbp->cmd);
-   lrbp->cmd = NULL;
-   }
-   }
-   }
-
-   /* complete internal command */
-   if (hba->i_cmd.dev_cmd_complete)
-   complete(hba->i_cmd.dev_cmd_complete);
-
-   /* clear outstanding request/task bit maps */
-   hba->outstanding_reqs = 0;
-   hba->outstanding_tasks = 0;
-
-   /* Host controller enable */
-   if (ufshcd_hba_enable(hba)) {
-   dev_err(hba->dev,
-   "Reset: Controller initialization failed\n");
-   return FAILED;
-   }
-
-   if (ufshcd_link_startup(hba)) {
-   dev_err(hba->dev,
-   "Reset: Link start-up failed\n");
-   return FAILED;
-   }
-
-   return SUCCESS;
-}
-
-/**
  * ufshcd_slave_alloc - handle initial SCSI device configurations
  * @sdev: pointer to SCSI device
  *
@@ -1773,6 +1720,9 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
sdev->use_10_for_ms = 1;
scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
 
+   /* allow SCSI layer to restart the device in case of errors */
+   sdev->allow_restart = 1;
+
/*
 * Inform SCSI Midlayer that the LUN queue depth is same as the
 * controller queue depth. If a LUN queue depth is less than the
@@ -1974,6 +1924,9 @@ ufshcd_transfer_r