On Tue, 2017-09-05 at 15:54 +0300, Pavel Tikhomirov wrote:
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index f6097b89d5d3..6c99221d60aa 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -320,12 +320,11 @@ void scsi_device_unbusy(struct scsi_device *sdev)
>       if (starget->can_queue > 0)
>               atomic_dec(&starget->target_busy);
>  
> +     spin_lock_irqsave(shost->host_lock, flags);
>       if (unlikely(scsi_host_in_recovery(shost) &&
> -                  (shost->host_failed || shost->host_eh_scheduled))) {
> -             spin_lock_irqsave(shost->host_lock, flags);
> +                  (shost->host_failed || shost->host_eh_scheduled)))
>               scsi_eh_wakeup(shost);
> -             spin_unlock_irqrestore(shost->host_lock, flags);
> -     }
> +     spin_unlock_irqrestore(shost->host_lock, flags);
>  
>       atomic_dec(&sdev->device_busy);
>  }
> @@ -1503,6 +1502,13 @@ static inline int scsi_host_queue_ready(struct 
> request_queue *q,
>       spin_unlock_irq(shost->host_lock);
>  out_dec:
>       atomic_dec(&shost->host_busy);
> +
> +     spin_lock_irq(shost->host_lock);
> +     if (unlikely(scsi_host_in_recovery(shost) &&
> +                  (shost->host_failed || shost->host_eh_scheduled)))
> +             scsi_eh_wakeup(shost);
> +     spin_unlock_irq(shost->host_lock);
> +
>       return 0;
>  }
>  
> @@ -1964,6 +1970,13 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx 
> *hctx,
>  
>  out_dec_host_busy:
>       atomic_dec(&shost->host_busy);
> +
> +     spin_lock_irq(shost->host_lock);
> +     if (unlikely(scsi_host_in_recovery(shost) &&
> +                  (shost->host_failed || shost->host_eh_scheduled)))
> +             scsi_eh_wakeup(shost);
> +     spin_unlock_irq(shost->host_lock);
> +
>  out_dec_target_busy:
>       if (scsi_target(sdev)->can_queue > 0)
>               atomic_dec(&scsi_target(sdev)->target_busy);

An important achievement of the scsi-mq code was removal of all
spin_lock_irq(shost->host_lock) statements from the hot path. The above
changes will have a significant negative performance impact, especially if
multiple LUNs associated with the same SCSI host are involved. Can the
reported race be fixed without slowing down the hot path significantly? I
think that both adding spin lock or smp_mb() calls in the hot path will
have a significant negative performance impact.

Thanks,

Bart.

Reply via email to