RE: [PATCH V5 2/4] scsi: ufs: Fix hardware race conditions while aborting a command

2013-08-13 Thread Yaniv Gardi
Reviewed-by: Yaniv Gardi 

QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


= > -Original Message-
= > From: linux-scsi-ow...@vger.kernel.org [mailto:linux-scsi-
= > ow...@vger.kernel.org] On Behalf Of Dolev Raviv
= > Sent: Monday, August 12, 2013 4:02 PM
= > To: Sujit Reddy Thumma
= > Cc: Vinayak Holikatti; Santosh Y; James E.J. Bottomley; linux-
= > s...@vger.kernel.org; Sujit Reddy Thumma; linux-arm-
= > m...@vger.kernel.org
= > Subject: Re: [PATCH V5 2/4] scsi: ufs: Fix hardware race conditions
while
= > aborting a command
= > 
= > Tested-by: Dolev Raviv 
= > 
= > > There is a possible race condition in the hardware when the abort
= > > command is issued to terminate the ongoing SCSI command as described
= > > below:
= > >
= > > - A bit in the door-bell register is set in the controller for a
= > >   new SCSI command.
= > > - In some rare situations, before controller get a chance to issue
= > >   the command to the device, the software issued an abort command.
= > > - If the device recieves abort command first then it returns success
= > >   because the command itself is not present.
= > > - Now if the controller commits the command to device it will be
= > >   processed.
= > > - Software thinks that command is aborted and proceed while still
= > >   the device is processing it.
= > > - The software, controller and device may go out of sync because of
= > >   this race condition.
= > >
= > > To avoid this, query task presence in the device before sending abort
= > > task command so that after the abort operation, the command is
= > > guaranteed to be non-existent in both controller and the device.
= > >
= > > Signed-off-by: Sujit Reddy Thumma 
= > > ---
= > >  drivers/scsi/ufs/ufshcd.c |   70
= > > +++-
= > >  1 files changed, 55 insertions(+), 15 deletions(-)
= > >
= > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
= > > index d7f3746..d4ee48d 100644
= > > --- a/drivers/scsi/ufs/ufshcd.c
= > > +++ b/drivers/scsi/ufs/ufshcd.c
= > > @@ -2485,6 +2485,12 @@ static int ufshcd_host_reset(struct scsi_cmnd
= > *cmd)
= > >   * ufshcd_abort - abort a specific command
= > >   * @cmd: SCSI command pointer
= > >   *
= > > + * Abort the pending command in device by sending UFS_ABORT_TASK
= > task
= > > management
= > > + * command, and in host controller by clearing the door-bell
register.
= > > There can
= > > + * be race between controller sending the command to the device while
= > > abort is
= > > + * issued. To avoid that, first issue UFS_QUERY_TASK to check if the
= > > command is
= > > + * really issued and then try to abort it.
= > > + *
= > >   * Returns SUCCESS/FAILED
= > >   */
= > >  static int ufshcd_abort(struct scsi_cmnd *cmd) @@ -2493,7 +2499,8
= > @@
= > > static int ufshcd_abort(struct scsi_cmnd *cmd)
= > >   struct ufs_hba *hba;
= > >   unsigned long flags;
= > >   unsigned int tag;
= > > - int err;
= > > + int err = 0;
= > > + int poll_cnt;
= > >   u8 resp = 0xF;
= > >   struct ufshcd_lrb *lrbp;
= > >
= > > @@ -2501,33 +2508,59 @@ static int ufshcd_abort(struct scsi_cmnd
= > *cmd)
= > >   hba = shost_priv(host);
= > >   tag = cmd->request->tag;
= > >
= > > - spin_lock_irqsave(host->host_lock, flags);
= > > + /* If command is already aborted/completed, return SUCCESS */
= > > + if (!(test_bit(tag, &hba->outstanding_reqs)))
= > > + goto out;
= > >
= > > - /* check if command is still pending */
= > > - if (!(test_bit(tag, &hba->outstanding_reqs))) {
= > > - err = FAILED;
= > > - spin_unlock_irqrestore(host->host_lock, flags);
= > > + lrbp = &hba->lrb[tag];
= > > + for (poll_cnt = 100; poll_cnt; poll_cnt--) {
= > > + err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
= > > + UFS_QUERY_TASK, &resp);
= > > + if (!err && resp ==
= > UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED) {
= > > + /* cmd pending in the device */
= > > + break;
= > > + } else if (!err && resp ==
= > UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
= > > + u32 reg;
= > > +
= > > + /*
= > > +  * cmd not pending in the device, check if it is
= > > +  * in transition.
= > > +  */
= > > +   

Re: [PATCH V5 2/4] scsi: ufs: Fix hardware race conditions while aborting a command

2013-08-12 Thread Dolev Raviv
Tested-by: Dolev Raviv 

> There is a possible race condition in the hardware when the abort
> command is issued to terminate the ongoing SCSI command as described
> below:
>
> - A bit in the door-bell register is set in the controller for a
>   new SCSI command.
> - In some rare situations, before controller get a chance to issue
>   the command to the device, the software issued an abort command.
> - If the device recieves abort command first then it returns success
>   because the command itself is not present.
> - Now if the controller commits the command to device it will be
>   processed.
> - Software thinks that command is aborted and proceed while still
>   the device is processing it.
> - The software, controller and device may go out of sync because of
>   this race condition.
>
> To avoid this, query task presence in the device before sending abort
> task command so that after the abort operation, the command is guaranteed
> to be non-existent in both controller and the device.
>
> Signed-off-by: Sujit Reddy Thumma 
> ---
>  drivers/scsi/ufs/ufshcd.c |   70
> +++-
>  1 files changed, 55 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index d7f3746..d4ee48d 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -2485,6 +2485,12 @@ static int ufshcd_host_reset(struct scsi_cmnd *cmd)
>   * ufshcd_abort - abort a specific command
>   * @cmd: SCSI command pointer
>   *
> + * Abort the pending command in device by sending UFS_ABORT_TASK task
> management
> + * command, and in host controller by clearing the door-bell register.
> There can
> + * be race between controller sending the command to the device while
> abort is
> + * issued. To avoid that, first issue UFS_QUERY_TASK to check if the
> command is
> + * really issued and then try to abort it.
> + *
>   * Returns SUCCESS/FAILED
>   */
>  static int ufshcd_abort(struct scsi_cmnd *cmd)
> @@ -2493,7 +2499,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>   struct ufs_hba *hba;
>   unsigned long flags;
>   unsigned int tag;
> - int err;
> + int err = 0;
> + int poll_cnt;
>   u8 resp = 0xF;
>   struct ufshcd_lrb *lrbp;
>
> @@ -2501,33 +2508,59 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>   hba = shost_priv(host);
>   tag = cmd->request->tag;
>
> - spin_lock_irqsave(host->host_lock, flags);
> + /* If command is already aborted/completed, return SUCCESS */
> + if (!(test_bit(tag, &hba->outstanding_reqs)))
> + goto out;
>
> - /* check if command is still pending */
> - if (!(test_bit(tag, &hba->outstanding_reqs))) {
> - err = FAILED;
> - spin_unlock_irqrestore(host->host_lock, flags);
> + lrbp = &hba->lrb[tag];
> + for (poll_cnt = 100; poll_cnt; poll_cnt--) {
> + err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
> + UFS_QUERY_TASK, &resp);
> + if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED) {
> + /* cmd pending in the device */
> + break;
> + } else if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
> + u32 reg;
> +
> + /*
> +  * cmd not pending in the device, check if it is
> +  * in transition.
> +  */
> + reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> + if (reg & (1 << tag)) {
> + /* sleep for max. 2ms to stabilize */
> + usleep_range(1000, 2000);
> + continue;
> + }
> + /* command completed already */
> + goto out;
> + } else {
> + if (!err)
> + err = resp; /* service response error */
> + goto out;
> + }
> + }
> +
> + if (!poll_cnt) {
> + err = -EBUSY;
>   goto out;
>   }
> - spin_unlock_irqrestore(host->host_lock, flags);
>
> - lrbp = &hba->lrb[tag];
>   err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
>   UFS_ABORT_TASK, &resp);
>   if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
> - err = FAILED;
> + if (!err)
> + err = resp; /* service response error */
>   goto out;
> - } else {
> - err = SUCCESS;
>   }
>
> + err = ufshcd_clear_cmd(hba, tag);
> + if (err)
> + goto out;
> +
>   scsi_dma_unmap(cmd);
>
>   spin_lock_irqsave(host->host_lock, flags);
> -
> - /* clear the respective UTRLCLR register bit */
> - ufshcd_utrl_clear(hba, tag);
> -
>   __clear_bit(tag, &hba->outstanding_reqs);
>   hba->lrb[tag].cmd =