Re: [PATCH 18/23] scsi_dh_alua: Send TEST UNIT READY to poll for transitioning

2016-02-11 Thread Hannes Reinecke
On 02/11/2016 09:25 PM, Ewan Milne wrote:
> On Mon, 2016-02-08 at 15:34 +0100, Hannes Reinecke wrote:
>> Sending a 'REPORT TARGET PORT GROUP' command is a costly operation,
>> as the array has to gather information about all ports.
>> So instead of using RTPG to poll for a status update when a port
>> is in transitioning we should be sending a TEST UNIT READY, and
>> wait for the sense code to report success.
> 
> Note that we may need to add a timeout on this somehow, I have
> recently seen a bug report where an array stayed in the ALUA
> transitioning state for an extremely long period of time.
> 
> That problem would occur with either the current or this new
> ALUA code, the question is whether we want to handle it better.
> 
There already is provisioning in the code to set the port to STANDBY
if the transitioning time is exceeded.
Not that this code path is well tested, but it's there :-)

Cheers,

Hannes
-- 
Dr. Hannes ReineckeTeamlead Storage & Networking
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 18/23] scsi_dh_alua: Send TEST UNIT READY to poll for transitioning

2016-02-11 Thread Ewan Milne
On Mon, 2016-02-08 at 15:34 +0100, Hannes Reinecke wrote:
> Sending a 'REPORT TARGET PORT GROUP' command is a costly operation,
> as the array has to gather information about all ports.
> So instead of using RTPG to poll for a status update when a port
> is in transitioning we should be sending a TEST UNIT READY, and
> wait for the sense code to report success.

Note that we may need to add a timeout on this somehow, I have
recently seen a bug report where an array stayed in the ALUA
transitioning state for an extremely long period of time.

That problem would occur with either the current or this new
ALUA code, the question is whether we want to handle it better.

-Ewan

> 
> Signed-off-by: Hannes Reinecke 
> Reviewed-by: Ewan Milne 
> Reviewed-by: Christoph Hellwig 
> ---
>  drivers/scsi/device_handler/scsi_dh_alua.c | 38 
> ++
>  1 file changed, 38 insertions(+)
> 
> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c 
> b/drivers/scsi/device_handler/scsi_dh_alua.c
> index de8e79e..a1db82f 100644
> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> @@ -466,6 +466,30 @@ static int alua_check_sense(struct scsi_device *sdev,
>  }
>  
>  /*
> + * alua_tur - Send a TEST UNIT READY
> + * @sdev: device to which the TEST UNIT READY command should be send
> + *
> + * Send a TEST UNIT READY to @sdev to figure out the device state
> + * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
> + * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
> + */
> +static int alua_tur(struct scsi_device *sdev)
> +{
> + struct scsi_sense_hdr sense_hdr;
> + int retval;
> +
> + retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
> +   ALUA_FAILOVER_RETRIES, _hdr);
> + if (sense_hdr.sense_key == NOT_READY &&
> + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
> + return SCSI_DH_RETRY;
> + else if (retval)
> + return SCSI_DH_IO;
> + else
> + return SCSI_DH_OK;
> +}
> +
> +/*
>   * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
>   * @sdev: the device to be evaluated.
>   *
> @@ -735,8 +759,22 @@ static void alua_rtpg_work(struct work_struct *work)
>   alua_wq = kaluad_sync_wq;
>   pg->flags |= ALUA_PG_RUNNING;
>   if (pg->flags & ALUA_PG_RUN_RTPG) {
> + int state = pg->state;
> +
>   pg->flags &= ~ALUA_PG_RUN_RTPG;
>   spin_unlock_irqrestore(>lock, flags);
> + if (state == TPGS_STATE_TRANSITIONING) {
> + if (alua_tur(sdev) == SCSI_DH_RETRY) {
> + spin_lock_irqsave(>lock, flags);
> + pg->flags &= ~ALUA_PG_RUNNING;
> + pg->flags |= ALUA_PG_RUN_RTPG;
> + spin_unlock_irqrestore(>lock, flags);
> + queue_delayed_work(alua_wq, >rtpg_work,
> +pg->interval * HZ);
> + return;
> + }
> + /* Send RTPG on failure or if TUR indicates SUCCESS */
> + }
>   err = alua_rtpg(sdev, pg);
>   spin_lock_irqsave(>lock, flags);
>   if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/23] scsi_dh_alua: Send TEST UNIT READY to poll for transitioning

2016-02-08 Thread Hannes Reinecke
Sending a 'REPORT TARGET PORT GROUP' command is a costly operation,
as the array has to gather information about all ports.
So instead of using RTPG to poll for a status update when a port
is in transitioning we should be sending a TEST UNIT READY, and
wait for the sense code to report success.

Signed-off-by: Hannes Reinecke 
Reviewed-by: Ewan Milne 
Reviewed-by: Christoph Hellwig 
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c 
b/drivers/scsi/device_handler/scsi_dh_alua.c
index de8e79e..a1db82f 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -466,6 +466,30 @@ static int alua_check_sense(struct scsi_device *sdev,
 }
 
 /*
+ * alua_tur - Send a TEST UNIT READY
+ * @sdev: device to which the TEST UNIT READY command should be send
+ *
+ * Send a TEST UNIT READY to @sdev to figure out the device state
+ * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
+ * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
+ */
+static int alua_tur(struct scsi_device *sdev)
+{
+   struct scsi_sense_hdr sense_hdr;
+   int retval;
+
+   retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
+ ALUA_FAILOVER_RETRIES, _hdr);
+   if (sense_hdr.sense_key == NOT_READY &&
+   sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
+   return SCSI_DH_RETRY;
+   else if (retval)
+   return SCSI_DH_IO;
+   else
+   return SCSI_DH_OK;
+}
+
+/*
  * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
  * @sdev: the device to be evaluated.
  *
@@ -735,8 +759,22 @@ static void alua_rtpg_work(struct work_struct *work)
alua_wq = kaluad_sync_wq;
pg->flags |= ALUA_PG_RUNNING;
if (pg->flags & ALUA_PG_RUN_RTPG) {
+   int state = pg->state;
+
pg->flags &= ~ALUA_PG_RUN_RTPG;
spin_unlock_irqrestore(>lock, flags);
+   if (state == TPGS_STATE_TRANSITIONING) {
+   if (alua_tur(sdev) == SCSI_DH_RETRY) {
+   spin_lock_irqsave(>lock, flags);
+   pg->flags &= ~ALUA_PG_RUNNING;
+   pg->flags |= ALUA_PG_RUN_RTPG;
+   spin_unlock_irqrestore(>lock, flags);
+   queue_delayed_work(alua_wq, >rtpg_work,
+  pg->interval * HZ);
+   return;
+   }
+   /* Send RTPG on failure or if TUR indicates SUCCESS */
+   }
err = alua_rtpg(sdev, pg);
spin_lock_irqsave(>lock, flags);
if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
-- 
1.8.5.6

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html