Re: [PATCH] scsi: ufs: Make use of UFS_BIT macro wherever possible

2017-09-29 Thread Alim Akhtar
Hi Martin,

On 09/26/2017 01:01 AM, Martin K. Petersen wrote:
> 
> Alim,
> 
>> Should I drop this patch and send another one which removes UFS_BIT()
>> macro?
> 
> I fail to see the point of UFS_BIT(). So yes.
> 
> Please make sure to CC: Subhash on ufs changes.
> 
Thanks for looking into this patch.
I have just posted https://lkml.org/lkml/2017/9/30/41 (a series of 3 
patches)
Please take a look.

Regards,
Alim




[PATCH 1/3] scsi: ufs: Change HCI marco to actual bit position

2017-09-29 Thread Alim Akhtar
Currently UFS HCI uses UFS_BIT() macro to get various bit
position for the hardware registers status bits. Which makes
code longer instead of shorter. This macro does not improve
code readability as well.
Lets re-write these macro definition with the actual bit position.

Suggested-by: Bart Van Assche 
Signed-off-by: Alim Akhtar 
---
 This patch is only complied tested, appreciate testing on actual h/w.
 Please see https://lkml.org/lkml/2017/8/28/786  discussion.
 
 drivers/scsi/ufs/ufshcd.h | 14 +-
 drivers/scsi/ufs/ufshci.h | 69 ---
 2 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index cdc8bd0..ce2920b 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -544,13 +544,13 @@ struct ufs_hba {
bool is_irq_enabled;
 
/* Interrupt aggregation support is broken */
-   #define UFSHCD_QUIRK_BROKEN_INTR_AGGR   UFS_BIT(0)
+   #define UFSHCD_QUIRK_BROKEN_INTR_AGGR   0x1
 
/*
 * delay before each dme command is required as the unipro
 * layer has shown instabilities
 */
-   #define UFSHCD_QUIRK_DELAY_BEFORE_DME_CMDS  UFS_BIT(1)
+   #define UFSHCD_QUIRK_DELAY_BEFORE_DME_CMDS  0x2
 
/*
 * If UFS host controller is having issue in processing LCC (Line
@@ -559,21 +559,21 @@ struct ufs_hba {
 * the LCC transmission on UFS device (by clearing TX_LCC_ENABLE
 * attribute of device to 0).
 */
-   #define UFSHCD_QUIRK_BROKEN_LCC UFS_BIT(2)
+   #define UFSHCD_QUIRK_BROKEN_LCC 0x4
 
/*
 * The attribute PA_RXHSUNTERMCAP specifies whether or not the
 * inbound Link supports unterminated line in HS mode. Setting this
 * attribute to 1 fixes moving to HS gear.
 */
-   #define UFSHCD_QUIRK_BROKEN_PA_RXHSUNTERMCAPUFS_BIT(3)
+   #define UFSHCD_QUIRK_BROKEN_PA_RXHSUNTERMCAP0x8
 
/*
 * This quirk needs to be enabled if the host contoller only allows
 * accessing the peer dme attributes in AUTO mode (FAST AUTO or
 * SLOW AUTO).
 */
-   #define UFSHCD_QUIRK_DME_PEER_ACCESS_AUTO_MODE  UFS_BIT(4)
+   #define UFSHCD_QUIRK_DME_PEER_ACCESS_AUTO_MODE  0x10
 
/*
 * This quirk needs to be enabled if the host contoller doesn't
@@ -581,13 +581,13 @@ struct ufs_hba {
 * is enabled, standard UFS host driver will call the vendor specific
 * ops (get_ufs_hci_version) to get the correct version.
 */
-   #define UFSHCD_QUIRK_BROKEN_UFS_HCI_VERSION UFS_BIT(5)
+   #define UFSHCD_QUIRK_BROKEN_UFS_HCI_VERSION 0x20
 
/*
 * This quirk needs to be enabled if the host contoller regards
 * resolution of the values of PRDTO and PRDTL in UTRD as byte.
 */
-   #define UFSHCD_QUIRK_PRDT_BYTE_GRAN UFS_BIT(7)
+   #define UFSHCD_QUIRK_PRDT_BYTE_GRAN 0x80
 
unsigned int quirks;/* Deviations from standard UFSHCI spec. */
 
diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index f60145d..5a60a8f 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h
@@ -119,22 +119,24 @@ enum {
 #define MANUFACTURE_ID_MASKUFS_MASK(0x, 0)
 #define PRODUCT_ID_MASKUFS_MASK(0x, 16)
 
-#define UFS_BIT(x) (1L << (x))
-
-#define UTP_TRANSFER_REQ_COMPL UFS_BIT(0)
-#define UIC_DME_END_PT_RESET   UFS_BIT(1)
-#define UIC_ERROR  UFS_BIT(2)
-#define UIC_TEST_MODE  UFS_BIT(3)
-#define UIC_POWER_MODE UFS_BIT(4)
-#define UIC_HIBERNATE_EXIT UFS_BIT(5)
-#define UIC_HIBERNATE_ENTERUFS_BIT(6)
-#define UIC_LINK_LOST  UFS_BIT(7)
-#define UIC_LINK_STARTUP   UFS_BIT(8)
-#define UTP_TASK_REQ_COMPL UFS_BIT(9)
-#define UIC_COMMAND_COMPL  UFS_BIT(10)
-#define DEVICE_FATAL_ERROR UFS_BIT(11)
-#define CONTROLLER_FATAL_ERROR UFS_BIT(16)
-#define SYSTEM_BUS_FATAL_ERROR UFS_BIT(17)
+#define UFS_BIT(x) (1L << (x))
+/*
+ * IS - Interrupt Status - 20h
+ */
+#define UTP_TRANSFER_REQ_COMPL 0x1
+#define UIC_DME_END_PT_RESET   0x2
+#define UIC_ERROR  0x4
+#define UIC_TEST_MODE  0x8
+#define UIC_POWER_MODE 0x10
+#define UIC_HIBERNATE_EXIT 0x20
+#define UIC_HIBERNATE_ENTER0x40
+#define UIC_LINK_LOST  0x80
+#define UIC_LINK_STARTUP   0x100
+#define UT

[PATCH 3/3] scsi: ufs: Remove unused UFS_BIT() macro

2017-09-29 Thread Alim Akhtar
Since we have coverted all the user of UFS_BIT() macro
with the actual bit position, let remove unused UFS_BIT()macro.

Signed-off-by: Alim Akhtar 
---
 drivers/scsi/ufs/ufshci.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index 5a60a8f..c51fa6d 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h
@@ -119,7 +119,6 @@ enum {
 #define MANUFACTURE_ID_MASKUFS_MASK(0x, 0)
 #define PRODUCT_ID_MASKUFS_MASK(0x, 16)
 
-#define UFS_BIT(x) (1L << (x))
 /*
  * IS - Interrupt Status - 20h
  */
-- 
2.7.4



[PATCH 2/3] scsi: ufs-qcom: Remove uses of UFS_BIT() macro

2017-09-29 Thread Alim Akhtar
Use actaul bit position instead of UFS_BIT() macro. This
patch also changes bit-17 to meaningful #define.

This change is as per discussion here [1]
[1] -> https://lkml.org/lkml/2017/8/28/786

Signed-off-by: Alim Akhtar 
Cc: Subhash Jadavani 
---

 This patch is only complied tested, appreciate testing on actual h/w

 drivers/scsi/ufs/ufs-qcom.c | 4 ++--
 drivers/scsi/ufs/ufs-qcom.h | 7 ---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index c87d770..6a548e7 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -1458,7 +1458,7 @@ static void ufs_qcom_print_hw_debug_reg_all(struct 
ufs_hba *hba,
print_fn(hba, reg, 44, "UFS_UFS_DBG_RD_REG_OCSC ", priv);
 
reg = ufshcd_readl(hba, REG_UFS_CFG1);
-   reg |= UFS_BIT(17);
+   reg |= UTP_DBG_RAMS_EN;
ufshcd_writel(hba, reg, REG_UFS_CFG1);
 
reg = ufs_qcom_get_debug_reg_offset(host, UFS_UFS_DBG_RD_EDTL_RAM);
@@ -1471,7 +1471,7 @@ static void ufs_qcom_print_hw_debug_reg_all(struct 
ufs_hba *hba,
print_fn(hba, reg, 64, "UFS_UFS_DBG_RD_PRDT_RAM ", priv);
 
/* clear bit 17 - UTP_DBG_RAMS_EN */
-   ufshcd_rmwl(hba, UFS_BIT(17), 0, REG_UFS_CFG1);
+   ufshcd_rmwl(hba, UTP_DBG_RAMS_EN, 0, REG_UFS_CFG1);
 
reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_UAWM);
print_fn(hba, reg, 4, "UFS_DBG_RD_REG_UAWM ", priv);
diff --git a/drivers/scsi/ufs/ufs-qcom.h b/drivers/scsi/ufs/ufs-qcom.h
index 076f528..295f4be 100644
--- a/drivers/scsi/ufs/ufs-qcom.h
+++ b/drivers/scsi/ufs/ufs-qcom.h
@@ -92,7 +92,8 @@ enum {
 #define UFS_CNTLR_3_x_x_VEN_REGS_OFFSET(x) (0x400 + x)
 
 /* bit definitions for REG_UFS_CFG1 register */
-#define QUNIPRO_SELUFS_BIT(0)
+#define QUNIPRO_SEL0x1
+#define UTP_DBG_RAMS_EN0x2
 #define TEST_BUS_ENBIT(18)
 #define TEST_BUS_SEL   GENMASK(22, 19)
 #define UFS_REG_TEST_BUS_ENBIT(30)
@@ -213,13 +214,13 @@ struct ufs_qcom_host {
 * Note: By default this capability will be kept enabled if host
 * controller supports the QUniPro mode.
 */
-   #define UFS_QCOM_CAP_QUNIPROUFS_BIT(0)
+   #define UFS_QCOM_CAP_QUNIPRO0x1
 
/*
 * Set this capability if host controller can retain the secure
 * configuration even after UFS controller core power collapse.
 */
-   #define UFS_QCOM_CAP_RETAIN_SEC_CFG_AFTER_PWR_COLLAPSE  UFS_BIT(1)
+   #define UFS_QCOM_CAP_RETAIN_SEC_CFG_AFTER_PWR_COLLAPSE  0x2
u32 caps;
 
struct phy *generic_phy;
-- 
2.7.4



[PATCH V7 1/6] blk-mq: only run hw queues for blk-mq

2017-09-29 Thread Ming Lei
This patch just makes it explicitely.

Tested-by: Oleksandr Natalenko 
Tested-by: Martin Steigerwald 
Reviewed-by: Johannes Thumshirn 
Cc: Bart Van Assche 
Signed-off-by: Ming Lei 
---
 block/blk-mq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 98a18609755e..6fd9f86fc86d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -125,7 +125,8 @@ void blk_freeze_queue_start(struct request_queue *q)
freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
if (freeze_depth == 1) {
percpu_ref_kill(&q->q_usage_counter);
-   blk_mq_run_hw_queues(q, false);
+   if (q->mq_ops)
+   blk_mq_run_hw_queues(q, false);
}
 }
 EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
-- 
2.9.5



[PATCH V7 4/6] block: prepare for passing RQF_PREEMPT to request allocation

2017-09-29 Thread Ming Lei
REQF_PREEMPT is a bit special because the request is required
to be dispatched to lld even when SCSI device is quiesced.

So this patch introduces __blk_get_request() and allows users to pass
RQF_PREEMPT flag in, then we can allow to allocate request of RQF_PREEMPT
when queue is in mode of PREEMPT ONLY which will be introduced
in the following patch.

Tested-by: Oleksandr Natalenko 
Tested-by: Martin Steigerwald 
Cc: Bart Van Assche 
Signed-off-by: Ming Lei 
---
 block/blk-core.c   | 19 +--
 block/blk-mq.c |  3 +--
 include/linux/blk-mq.h |  7 ---
 include/linux/blkdev.h | 17 ++---
 4 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 7d5040a6d5a4..95b1c5e50be3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1398,7 +1398,8 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
 }
 
 static struct request *blk_old_get_request(struct request_queue *q,
-  unsigned int op, gfp_t gfp_mask)
+  unsigned int op, gfp_t gfp_mask,
+  unsigned int flags)
 {
struct request *rq;
int ret = 0;
@@ -1408,8 +1409,7 @@ static struct request *blk_old_get_request(struct 
request_queue *q,
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
 
-   ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM) ?
-   BLK_REQ_NOWAIT : 0);
+   ret = blk_queue_enter(q, flags & BLK_REQ_BITS_MASK);
if (ret)
return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
@@ -1427,26 +1427,25 @@ static struct request *blk_old_get_request(struct 
request_queue *q,
return rq;
 }
 
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
-   gfp_t gfp_mask)
+struct request *__blk_get_request(struct request_queue *q, unsigned int op,
+ gfp_t gfp_mask, unsigned int flags)
 {
struct request *req;
 
+   flags |= gfp_mask & __GFP_DIRECT_RECLAIM ? 0 : BLK_REQ_NOWAIT;
if (q->mq_ops) {
-   req = blk_mq_alloc_request(q, op,
-   (gfp_mask & __GFP_DIRECT_RECLAIM) ?
-   0 : BLK_MQ_REQ_NOWAIT);
+   req = blk_mq_alloc_request(q, op, flags);
if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
q->mq_ops->initialize_rq_fn(req);
} else {
-   req = blk_old_get_request(q, op, gfp_mask);
+   req = blk_old_get_request(q, op, gfp_mask, flags);
if (!IS_ERR(req) && q->initialize_rq_fn)
q->initialize_rq_fn(req);
}
 
return req;
 }
-EXPORT_SYMBOL(blk_get_request);
+EXPORT_SYMBOL(__blk_get_request);
 
 /**
  * blk_requeue_request - put a request back on queue
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 45bff90e08f7..90b43f607e3c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -384,8 +384,7 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, unsigned int op,
struct request *rq;
int ret;
 
-   ret = blk_queue_enter(q, (flags & BLK_MQ_REQ_NOWAIT) ?
-   BLK_REQ_NOWAIT : 0);
+   ret = blk_queue_enter(q, flags & BLK_REQ_BITS_MASK);
if (ret)
return ERR_PTR(ret);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 50c6485cb04f..066a676d7749 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -197,9 +197,10 @@ void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 
 enum {
-   BLK_MQ_REQ_NOWAIT   = (1 << 0), /* return when out of requests */
-   BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
-   BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
+   BLK_MQ_REQ_NOWAIT   = BLK_REQ_NOWAIT, /* return when out of 
requests */
+   BLK_MQ_REQ_PREEMPT  = BLK_REQ_PREEMPT, /* allocate for RQF_PREEMPT 
*/
+   BLK_MQ_REQ_RESERVED = (1 << BLK_REQ_MQ_START_BIT), /* allocate from 
reserved pool */
+   BLK_MQ_REQ_INTERNAL = (1 << (BLK_REQ_MQ_START_BIT + 1)), /* 
allocate internal/sched tag */
 };
 
 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 127f64c7012c..68445adc8765 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -860,7 +860,10 @@ enum {
 
 /* passed to blk_queue_enter */
 enum {
-   BLK_REQ_NOWAIT = (1 << 0),
+   BLK_REQ_NOWAIT  = (1 << 0),
+   BLK_REQ_PREEMPT = (1 << 1),
+   BLK_REQ_MQ_START_BIT= 2,
+   BLK_REQ_BITS_MASK   = (1U << BLK_REQ_MQ_START_BIT) - 1,
 };
 
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
@@ -945,8 +948,9 @@ extern vo

[PATCH V7 6/6] SCSI: set block queue at preempt only when SCSI device is put into quiesce

2017-09-29 Thread Ming Lei
Simply quiesing SCSI device and waiting for completeion of IO
dispatched to SCSI queue isn't safe, it is easy to use up
request pool because all allocated requests before can't
be dispatched when device is put in QIUESCE. Then no request
can be allocated for RQF_PREEMPT, and system may hang somewhere,
such as When sending commands of sync_cache or start_stop during
system suspend path.

Before quiesing SCSI, this patch sets block queue in preempt
mode first, so no new normal request can enter queue any more,
and all pending requests are drained too once blk_set_preempt_only(true)
is returned. Then RQF_PREEMPT can be allocated successfully duirng
SCSI quiescing.

This patch fixes one long term issue of IO hang, in either block legacy
and blk-mq.

Tested-by: Oleksandr Natalenko 
Tested-by: Martin Steigerwald 
Cc: sta...@vger.kernel.org
Cc: Bart Van Assche 
Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_lib.c | 25 ++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9cf6a80fe297..82c51619f1b7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -252,9 +252,10 @@ int scsi_execute(struct scsi_device *sdev, const unsigned 
char *cmd,
struct scsi_request *rq;
int ret = DRIVER_ERROR << 24;
 
-   req = blk_get_request(sdev->request_queue,
+   req = __blk_get_request(sdev->request_queue,
data_direction == DMA_TO_DEVICE ?
-   REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
+   REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM,
+   BLK_REQ_PREEMPT);
if (IS_ERR(req))
return ret;
rq = scsi_req(req);
@@ -2928,12 +2929,28 @@ scsi_device_quiesce(struct scsi_device *sdev)
 {
int err;
 
+   /*
+* Simply quiesing SCSI device isn't safe, it is easy
+* to use up requests because all these allocated requests
+* can't be dispatched when device is put in QIUESCE.
+* Then no request can be allocated and we may hang
+* somewhere, such as system suspend/resume.
+*
+* So we set block queue in preempt only first, no new
+* normal request can enter queue any more, and all pending
+* requests are drained once blk_set_preempt_only()
+* returns. Only RQF_PREEMPT is allowed in preempt only mode.
+*/
+   blk_set_preempt_only(sdev->request_queue, true);
+
mutex_lock(&sdev->state_mutex);
err = scsi_device_set_state(sdev, SDEV_QUIESCE);
mutex_unlock(&sdev->state_mutex);
 
-   if (err)
+   if (err) {
+   blk_set_preempt_only(sdev->request_queue, false);
return err;
+   }
 
scsi_run_queue(sdev->request_queue);
while (atomic_read(&sdev->device_busy)) {
@@ -2964,6 +2981,8 @@ void scsi_device_resume(struct scsi_device *sdev)
scsi_device_set_state(sdev, SDEV_RUNNING) == 0)
scsi_run_queue(sdev->request_queue);
mutex_unlock(&sdev->state_mutex);
+
+   blk_set_preempt_only(sdev->request_queue, false);
 }
 EXPORT_SYMBOL(scsi_device_resume);
 
-- 
2.9.5



[PATCH V7 5/6] block: support PREEMPT_ONLY

2017-09-29 Thread Ming Lei
When queue is in PREEMPT_ONLY mode, only RQF_PREEMPT request
can be allocated and dispatched, other requests won't be allowed
to enter I/O path.

This is useful for supporting safe SCSI quiesce.

Part of this patch is from Bart's '[PATCH v4 4∕7] block: Add the 
QUEUE_FLAG_PREEMPT_ONLY
request queue flag'.

Tested-by: Oleksandr Natalenko 
Tested-by: Martin Steigerwald 
Cc: Bart Van Assche 
Signed-off-by: Ming Lei 
---
 block/blk-core.c   | 26 --
 include/linux/blkdev.h |  5 +
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 95b1c5e50be3..bb683bfe37b2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -346,6 +346,17 @@ void blk_sync_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
+void blk_set_preempt_only(struct request_queue *q, bool preempt_only)
+{
+   blk_mq_freeze_queue(q);
+   if (preempt_only)
+   queue_flag_set_unlocked(QUEUE_FLAG_PREEMPT_ONLY, q);
+   else
+   queue_flag_clear_unlocked(QUEUE_FLAG_PREEMPT_ONLY, q);
+   blk_mq_unfreeze_queue(q);
+}
+EXPORT_SYMBOL(blk_set_preempt_only);
+
 /**
  * __blk_run_queue_uncond - run a queue whether or not it has been stopped
  * @q: The queue to run
@@ -771,9 +782,18 @@ int blk_queue_enter(struct request_queue *q, unsigned 
flags)
while (true) {
int ret;
 
+   /*
+* preempt_only flag has to be set after queue is frozen,
+* so it can be checked here lockless and safely
+*/
+   if (blk_queue_preempt_only(q)) {
+   if (!(flags & BLK_REQ_PREEMPT))
+   goto slow_path;
+   }
+
if (percpu_ref_tryget_live(&q->q_usage_counter))
return 0;
-
+ slow_path:
if (flags & BLK_REQ_NOWAIT)
return -EBUSY;
 
@@ -787,7 +807,9 @@ int blk_queue_enter(struct request_queue *q, unsigned flags)
smp_rmb();
 
ret = wait_event_interruptible(q->mq_freeze_wq,
-   !atomic_read(&q->mq_freeze_depth) ||
+   (!atomic_read(&q->mq_freeze_depth) &&
+   ((flags & BLK_REQ_PREEMPT) ||
+!blk_queue_preempt_only(q))) ||
blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 68445adc8765..b01a0c6bb1f0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -631,6 +631,7 @@ struct request_queue {
 #define QUEUE_FLAG_REGISTERED  26  /* queue has been registered to a disk 
*/
 #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */
 #define QUEUE_FLAG_QUIESCED28  /* queue has been quiesced */
+#define QUEUE_FLAG_PREEMPT_ONLY29  /* only process REQ_PREEMPT 
requests */
 
 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) |\
 (1 << QUEUE_FLAG_STACKABLE)|   \
@@ -735,6 +736,10 @@ static inline void queue_flag_clear(unsigned int flag, 
struct request_queue *q)
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 REQ_FAILFAST_DRIVER))
 #define blk_queue_quiesced(q)  test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
+#define blk_queue_preempt_only(q)  \
+   test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
+
+extern void blk_set_preempt_only(struct request_queue *q, bool preempt_only);
 
 static inline bool blk_account_rq(struct request *rq)
 {
-- 
2.9.5



[PATCH V7 2/6] block: tracking request allocation with q_usage_counter

2017-09-29 Thread Ming Lei
This usage is basically same with blk-mq, so that we can
support to freeze legacy queue easily.

Also 'wake_up_all(&q->mq_freeze_wq)' has to be moved
into blk_set_queue_dying() since both legacy and blk-mq
may wait on the wait queue of .mq_freeze_wq.

Tested-by: Oleksandr Natalenko 
Tested-by: Martin Steigerwald 
Reviewed-by: Hannes Reinecke 
Cc: Bart Van Assche 
Signed-off-by: Ming Lei 
---
 block/blk-core.c | 14 ++
 block/blk-mq.c   |  7 ---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 048be4aa6024..a5011c824ac6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -610,6 +610,12 @@ void blk_set_queue_dying(struct request_queue *q)
}
spin_unlock_irq(q->queue_lock);
}
+
+   /*
+* We need to ensure that processes currently waiting on
+* the queue are notified as well.
+*/
+   wake_up_all(&q->mq_freeze_wq);
 }
 EXPORT_SYMBOL_GPL(blk_set_queue_dying);
 
@@ -1395,16 +1401,21 @@ static struct request *blk_old_get_request(struct 
request_queue *q,
   unsigned int op, gfp_t gfp_mask)
 {
struct request *rq;
+   int ret = 0;
 
WARN_ON_ONCE(q->mq_ops);
 
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
 
+   ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM));
+   if (ret)
+   return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
rq = get_request(q, op, NULL, gfp_mask);
if (IS_ERR(rq)) {
spin_unlock_irq(q->queue_lock);
+   blk_queue_exit(q);
return rq;
}
 
@@ -1576,6 +1587,7 @@ void __blk_put_request(struct request_queue *q, struct 
request *req)
blk_free_request(rl, req);
freed_request(rl, sync, rq_flags);
blk_put_rl(rl);
+   blk_queue_exit(q);
}
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1857,8 +1869,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, 
struct bio *bio)
 * Grab a free request. This is might sleep but can not fail.
 * Returns with the queue unlocked.
 */
+   blk_queue_enter_live(q);
req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
if (IS_ERR(req)) {
+   blk_queue_exit(q);
__wbt_done(q->rq_wb, wb_acct);
if (PTR_ERR(req) == -ENOMEM)
bio->bi_status = BLK_STS_RESOURCE;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6fd9f86fc86d..10c1f49f663d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -256,13 +256,6 @@ void blk_mq_wake_waiters(struct request_queue *q)
queue_for_each_hw_ctx(q, hctx, i)
if (blk_mq_hw_queue_mapped(hctx))
blk_mq_tag_wakeup_all(hctx->tags, true);
-
-   /*
-* If we are called because the queue has now been marked as
-* dying, we need to ensure that processes currently waiting on
-* the queue are notified as well.
-*/
-   wake_up_all(&q->mq_freeze_wq);
 }
 
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
-- 
2.9.5



[PATCH V7 0/6] block/scsi: safe SCSI quiescing

2017-09-29 Thread Ming Lei
Hi Jens,

Please consider this patchset for V4.15, and it fixes one
kind of long-term I/O hang issue in either block legacy path
or blk-mq.

The current SCSI quiesce isn't safe and easy to trigger I/O deadlock.

Once SCSI device is put into QUIESCE, no new request except for
RQF_PREEMPT can be dispatched to SCSI successfully, and
scsi_device_quiesce() just simply waits for completion of I/Os
dispatched to SCSI stack. It isn't enough at all.

Because new request still can be comming, but all the allocated
requests can't be dispatched successfully, so request pool can be
consumed up easily.

Then request with RQF_PREEMPT can't be allocated and wait forever,
then system hangs forever, such as during system suspend or
sending SCSI domain alidation in case of transport_spi.

Both IO hang inside system suspend[1] or SCSI domain validation
were reported before.

This patch introduces preempt only mode, and solves the issue
by allowing RQF_PREEMP only during SCSI quiesce.

Both SCSI and SCSI_MQ have this IO deadlock issue, this patch fixes
them all.

V7:
- add Reviewed-by & Tested-by
- one line change in patch 5 for checking preempt request

V6:
- borrow Bart's idea of preempt only, with clean
  implementation(patch 5/patch 6)
- needn't any external driver's dependency, such as MD's
change

V5:
- fix one tiny race by introducing blk_queue_enter_preempt_freeze()
given this change is small enough compared with V4, I added
tested-by directly

V4:
- reorganize patch order to make it more reasonable
- support nested preempt freeze, as required by SCSI transport spi
- check preempt freezing in slow path of of blk_queue_enter()
- add "SCSI: transport_spi: resume a quiesced device"
- wake up freeze queue in setting dying for both blk-mq and legacy
- rename blk_mq_[freeze|unfreeze]_queue() in one patch
- rename .mq_freeze_wq and .mq_freeze_depth
- improve comment

V3:
- introduce q->preempt_unfreezing to fix one bug of preempt freeze
- call blk_queue_enter_live() only when queue is preempt frozen
- cleanup a bit on the implementation of preempt freeze
- only patch 6 and 7 are changed

V2:
- drop the 1st patch in V1 because percpu_ref_is_dying() is
enough as pointed by Tejun
- introduce preempt version of blk_[freeze|unfreeze]_queue
- sync between preempt freeze and normal freeze
- fix warning from percpu-refcount as reported by Oleksandr


[1] https://marc.info/?t=150340250100013&r=3&w=2


Thanks,
Ming

Ming Lei (6):
  blk-mq: only run hw queues for blk-mq
  block: tracking request allocation with q_usage_counter
  block: pass flags to blk_queue_enter()
  block: prepare for passing RQF_PREEMPT to request allocation
  block: support PREEMPT_ONLY
  SCSI: set block queue at preempt only when SCSI device is put into
quiesce

 block/blk-core.c| 63 +++--
 block/blk-mq.c  | 14 ---
 block/blk-timeout.c |  2 +-
 drivers/scsi/scsi_lib.c | 25 +---
 fs/block_dev.c  |  4 ++--
 include/linux/blk-mq.h  |  7 +++---
 include/linux/blkdev.h  | 27 ++---
 7 files changed, 107 insertions(+), 35 deletions(-)

-- 
2.9.5



[PATCH V7 3/6] block: pass flags to blk_queue_enter()

2017-09-29 Thread Ming Lei
We need to pass PREEMPT flags to blk_queue_enter()
for allocating request with RQF_PREEMPT in the
following patch.

Tested-by: Oleksandr Natalenko 
Tested-by: Martin Steigerwald 
Cc: Bart Van Assche 
Signed-off-by: Ming Lei 
---
 block/blk-core.c   | 10 ++
 block/blk-mq.c |  5 +++--
 block/blk-timeout.c|  2 +-
 fs/block_dev.c |  4 ++--
 include/linux/blkdev.h |  7 ++-
 5 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a5011c824ac6..7d5040a6d5a4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -766,7 +766,7 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
-int blk_queue_enter(struct request_queue *q, bool nowait)
+int blk_queue_enter(struct request_queue *q, unsigned flags)
 {
while (true) {
int ret;
@@ -774,7 +774,7 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
if (percpu_ref_tryget_live(&q->q_usage_counter))
return 0;
 
-   if (nowait)
+   if (flags & BLK_REQ_NOWAIT)
return -EBUSY;
 
/*
@@ -1408,7 +1408,8 @@ static struct request *blk_old_get_request(struct 
request_queue *q,
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
 
-   ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM));
+   ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM) ?
+   BLK_REQ_NOWAIT : 0);
if (ret)
return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
@@ -2215,7 +2216,8 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bio->bi_disk->queue;
 
-   if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
+   if (likely(blk_queue_enter(q, (bio->bi_opf & REQ_NOWAIT) ?
+   BLK_REQ_NOWAIT : 0) == 0)) {
struct bio_list lower, same;
 
/* Create a fresh bio_list for all subordinate requests 
*/
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 10c1f49f663d..45bff90e08f7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -384,7 +384,8 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, unsigned int op,
struct request *rq;
int ret;
 
-   ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
+   ret = blk_queue_enter(q, (flags & BLK_MQ_REQ_NOWAIT) ?
+   BLK_REQ_NOWAIT : 0);
if (ret)
return ERR_PTR(ret);
 
@@ -423,7 +424,7 @@ struct request *blk_mq_alloc_request_hctx(struct 
request_queue *q,
if (hctx_idx >= q->nr_hw_queues)
return ERR_PTR(-EIO);
 
-   ret = blk_queue_enter(q, true);
+   ret = blk_queue_enter(q, BLK_REQ_NOWAIT);
if (ret)
return ERR_PTR(ret);
 
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 17ec83bb0900..e803106a5e5b 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -134,7 +134,7 @@ void blk_timeout_work(struct work_struct *work)
struct request *rq, *tmp;
int next_set = 0;
 
-   if (blk_queue_enter(q, true))
+   if (blk_queue_enter(q, BLK_REQ_NOWAIT))
return;
spin_lock_irqsave(q->queue_lock, flags);
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 93d088ffc05c..98cf2d7ee9d3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -674,7 +674,7 @@ int bdev_read_page(struct block_device *bdev, sector_t 
sector,
if (!ops->rw_page || bdev_get_integrity(bdev))
return result;
 
-   result = blk_queue_enter(bdev->bd_queue, false);
+   result = blk_queue_enter(bdev->bd_queue, 0);
if (result)
return result;
result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false);
@@ -710,7 +710,7 @@ int bdev_write_page(struct block_device *bdev, sector_t 
sector,
 
if (!ops->rw_page || bdev_get_integrity(bdev))
return -EOPNOTSUPP;
-   result = blk_queue_enter(bdev->bd_queue, false);
+   result = blk_queue_enter(bdev->bd_queue, 0);
if (result)
return result;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 02fa42d24b52..127f64c7012c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -858,6 +858,11 @@ enum {
BLKPREP_INVALID,/* invalid command, kill, return -EREMOTEIO */
 };
 
+/* passed to blk_queue_enter */
+enum {
+   BLK_REQ_NOWAIT = (1 << 0),
+};
+
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
 
 /*
@@ -963,7 +968,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct 
gendisk *, fmode_t,
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 struct scsi_ioctl_command __user *);
 
-extern int blk_queue_enter(struct request_queue *q, boo

[PATCH v2 13/21] lpfc: Fix oops if nvmet_fc_register_targetport fails

2017-09-29 Thread James Smart
From: Dick Kennedy 

if nvmet targetport registration fails, the driver encounters a
NULL pointer oops in lpfc_hb_timeout_handler.

To fix: if registration fails, ensure nvmet_support is cleared
on the port structure.

Also enhanced the log message on failure.

Cc:  # 4.12+
Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_nvmet.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index e410a6f1e2c2..1746c888beac 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1153,9 +1153,14 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 #endif
if (error) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
-   "6025 Cannot register NVME targetport "
-   "x%x\n", error);
+   "6025 Cannot register NVME targetport x%x: "
+   "portnm %llx nodenm %llx segs %d qs %d\n",
+   error,
+   pinfo.port_name, pinfo.node_name,
+   lpfc_tgttemplate.max_sgl_segments,
+   lpfc_tgttemplate.max_hw_queues);
phba->targetport = NULL;
+   phba->nvmet_support = 0;
 
lpfc_nvmet_cleanup_io_context(phba);
 
@@ -1167,9 +1172,11 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
"6026 Registered NVME "
"targetport: %p, private %p "
-   "portnm %llx nodenm %llx\n",
+   "portnm %llx nodenm %llx segs %d qs %d\n",
phba->targetport, tgtp,
-   pinfo.port_name, pinfo.node_name);
+   pinfo.port_name, pinfo.node_name,
+   lpfc_tgttemplate.max_sgl_segments,
+   lpfc_tgttemplate.max_hw_queues);
 
atomic_set(&tgtp->rcv_ls_req_in, 0);
atomic_set(&tgtp->rcv_ls_req_out, 0);
-- 
2.13.1



[PATCH v2 12/21] lpfc: Revise NVME module parameter descriptions for better clarity

2017-09-29 Thread James Smart
From: Dick Kennedy 

The descriptions for lpfc_xri_split and lpfc_enable_fc4_type were
poor. Revise for better understanding:
  lpfc_xri_split - Percentage of FCP XRI resources versus NVME
  lpfc_enable_fc4_type - Enable FC4 Protocol support - FCP / NVME

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_attr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index f02269b46049..65886646a16f 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -3375,7 +3375,7 @@ LPFC_ATTR_R(nvmet_mrq,
  */
 LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
-   "Define fc4 type to register with fabric.");
+   "Enable FC4 Protocol support - FCP / NVME");
 
 /*
  * lpfc_xri_split: Defines the division of XRI resources between SCSI and NVME
@@ -3391,7 +3391,7 @@ LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
  * percentage will go to NVME.
  */
 LPFC_ATTR_R(xri_split, 50, 10, 90,
-   "Division of XRI resources between SCSI and NVME");
+   "Percentage of FCP XRI resources versus NVME");
 
 /*
 # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
-- 
2.13.1



[PATCH v2 18/21] lpfc: Extend RDP support

2017-09-29 Thread James Smart
From: Dick Kennedy 

Support RDP and Multiple Frames

If the remote Nport is not logged in, the driver would not populate
all the descriptors in the RDP response payload. Doing so would create
a payload length that requires multiple frames due to exceeding the
default rx buffer size without an explicit login. Currently FC-LS
explicitly states the RDP response must be a single frame sequence.
Thus we did not violate the standard.

Recently, a modification to FC-LS was accepted which allows
multi-frame sequences and all vendors have indicated they are
interoperable with the change. As such, extend RDP support with the
additional fields and send a multi-frame sequence.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_els.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 3ebf6ccba6e6..b14f7c5653cd 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -5394,10 +5394,6 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct 
lpfc_rdp_context *rdp_context,
(len + pcmd), vport, ndlp);
len += lpfc_rdp_res_fec_desc((struct fc_fec_rdp_desc *)(len + pcmd),
&rdp_context->link_stat);
-   /* Check if nport is logged, BZ190632 */
-   if (!(ndlp->nlp_flag & NLP_RPI_REGISTERED))
-   goto lpfc_skip_descriptor;
-
len += lpfc_rdp_res_bbc_desc((struct fc_rdp_bbc_desc *)(len + pcmd),
 &rdp_context->link_stat, vport);
len += lpfc_rdp_res_oed_temp_desc(phba,
@@ -5418,7 +5414,6 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct 
lpfc_rdp_context *rdp_context,
len += lpfc_rdp_res_opd_desc((struct fc_rdp_opd_sfp_desc *)(len + pcmd),
 rdp_context->page_a0, vport);
 
-lpfc_skip_descriptor:
rdp_res->length = cpu_to_be32(len - 8);
elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
 
@@ -5540,7 +5535,6 @@ lpfc_els_rcv_rdp(struct lpfc_vport *vport, struct 
lpfc_iocbq *cmdiocb,
pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
rdp_req = (struct fc_rdp_req_frame *) pcmd->virt;
 
-
lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
 "2422 ELS RDP Request "
 "dec len %d tag x%x port_id %d len %d\n",
@@ -5549,12 +5543,6 @@ lpfc_els_rcv_rdp(struct lpfc_vport *vport, struct 
lpfc_iocbq *cmdiocb,
 be32_to_cpu(rdp_req->nport_id_desc.nport_id),
 be32_to_cpu(rdp_req->nport_id_desc.length));
 
-   if (!(ndlp->nlp_flag & NLP_RPI_REGISTERED) &&
-   !phba->cfg_enable_SmartSAN) {
-   rjt_err = LSRJT_UNABLE_TPC;
-   rjt_expl = LSEXP_PORT_LOGIN_REQ;
-   goto error;
-   }
if (sizeof(struct fc_rdp_nport_desc) !=
be32_to_cpu(rdp_req->rdp_des_length))
goto rjt_logerr;
-- 
2.13.1



[PATCH v2 19/21] lpfc: Fix oops of nvme host during driver unload.

2017-09-29 Thread James Smart
From: Dick Kennedy 

When running NVME io as a NVME host, if the driver is unloaded there
would be oops in lpfc_sli4_issue_wqe.

When unloading, controllers are torn down and the transport initiates
set_property commands to reset the controller and issues aborts to
terminate existing io.  The drivers nvme abort and fcp io submit
routines needed to recognize the driver is unloading and fail the new
requests. It didn't, resulting in the oops.

Revise the ls and fcp io submit routines to detect the unloading
state and properly handle their cleanup.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
v2: reduce scope to only submit routines. aborts necessary for cleanup.
  this was a bug found after submit.

 drivers/scsi/lpfc/lpfc_nvme.c  |  8 
 drivers/scsi/lpfc/lpfc_nvmet.c | 11 +++
 2 files changed, 19 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index e3642c1890ea..040af28073bb 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -416,6 +416,9 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
vport = lport->vport;
 
+   if (vport->load_flag & FC_UNLOADING)
+   return -ENODEV;
+
ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
@@ -1252,6 +1255,11 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port 
*pnvme_lport,
vport = lport->vport;
phba = vport->phba;
 
+   if (vport->load_flag & FC_UNLOADING) {
+   ret = -ENODEV;
+   goto out_fail;
+   }
+
/* Validate pointers. */
if (!pnvme_lport || !pnvme_rport || !freqpriv) {
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR | LOG_NODE,
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 1746c888beac..d02a54d7ffc0 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -632,6 +632,9 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
struct ulp_bde64 bpl;
int rc;
 
+   if (phba->pport->load_flag & FC_UNLOADING)
+   return -ENODEV;
+
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
"6023 NVMET LS rsp oxid x%x\n", ctxp->oxid);
 
@@ -713,6 +716,11 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
struct lpfc_iocbq *nvmewqeq;
int rc;
 
+   if (phba->pport->load_flag & FC_UNLOADING) {
+   rc = -ENODEV;
+   goto aerr;
+   }
+
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (ctxp->ts_cmd_nvme) {
if (rsp->op == NVMET_FCOP_RSP)
@@ -812,6 +820,9 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port 
*tgtport,
struct lpfc_hba *phba = ctxp->phba;
unsigned long flags;
 
+   if (phba->pport->load_flag & FC_UNLOADING)
+   return;
+
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
"6103 NVMET Abort op: oxri x%x flg x%x ste %d\n",
ctxp->oxid, ctxp->flag, ctxp->state);
-- 
2.13.1



[PATCH v2 11/21] lpfc: Set missing abort context

2017-09-29 Thread James Smart
Always set ctxp->state to LPFC_NVMET_STE_ABORT if ABORT op gets called

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_nvmet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 354e60043ea1..e410a6f1e2c2 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -822,6 +822,7 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port 
*tgtport,
atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
 
spin_lock_irqsave(&ctxp->ctxlock, flags);
+   ctxp->state = LPFC_NVMET_STE_ABORT;
 
/* Since iaab/iaar are NOT set, we need to check
 * if the firmware is in process of aborting IO
-- 
2.13.1



[PATCH v2 20/21] lpfc: correct nvme sg segment count check

2017-09-29 Thread James Smart
The internal cfg flag is actually smaller, by 1 (for a partial page sge),
than the sg list maintained by the driver. Thus the check on sg segments
errored out when it shouldn't have

Ensure the check is +1

Note: having a value that is less than what it really is is bogus.
 Correcting it now would be a significant rework. Add this item to the
 list to be refactored in the merge with efct.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_nvme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 040af28073bb..05ab731c408a 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1149,12 +1149,12 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
 
first_data_sgl = sgl;
lpfc_ncmd->seg_cnt = nCmd->sg_cnt;
-   if (lpfc_ncmd->seg_cnt > phba->cfg_nvme_seg_cnt) {
+   if (lpfc_ncmd->seg_cnt > phba->cfg_nvme_seg_cnt + 1) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6058 Too many sg segments from "
"NVME Transport.  Max %d, "
"nvmeIO sg_cnt %d\n",
-   phba->cfg_nvme_seg_cnt,
+   phba->cfg_nvme_seg_cnt + 1,
lpfc_ncmd->seg_cnt);
lpfc_ncmd->seg_cnt = 0;
return 1;
-- 
2.13.1



[PATCH v2 14/21] lpfc: Disable NPIV support if NVME is enabled

2017-09-29 Thread James Smart
From: Dick Kennedy 

The initial implementation of NVME didn't merge with NPIV support.
As such, there are several issues if NPIV is used with NVME. For now,
ensure that if NVME is enabled then NPIV is not enabled.

Support for NPIV with NVME will be added in the near future.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_vport.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index c714482bf4c5..c9d33b1268cb 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -313,6 +313,15 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
goto error_out;
}
 
+   /* NPIV is not supported if HBA has NVME enabled */
+   if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+   lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
+   "3189 Create VPORT failed: "
+   "NPIV is not supported on NVME\n");
+   rc = VPORT_INVAL;
+   goto error_out;
+   }
+
vpi = lpfc_alloc_vpi(phba);
if (vpi == 0) {
lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
-- 
2.13.1



[PATCH v2 17/21] lpfc: Ensure io aborts interlocked with the target.

2017-09-29 Thread James Smart
From: Dick Kennedy 

Before releasing nvme io back to the io stack for possible
retry on other paths, ensure the io termination is interlocked
with the target device by ensuring the entire ABTS-LS protocol is
complete.

Additionally, FC-NVME ABTS-LS protocol does not use RRQ. Remove RRQ
behavior from ABTS-LS.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_nvme.c | 59 +--
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 02ba06f364c4..e3642c1890ea 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -850,7 +850,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct 
lpfc_iocbq *pwqeIn,
} else {
lpfc_ncmd->status = (bf_get(lpfc_wcqe_c_status, wcqe) &
LPFC_IOCB_STATUS_MASK);
-   lpfc_ncmd->result = wcqe->parameter;
+   lpfc_ncmd->result = (wcqe->parameter & IOERR_PARAM_MASK);
 
/* For NVME, the only failure path that results in an
 * IO error is when the adapter rejects it.  All other
@@ -884,6 +884,17 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct 
lpfc_iocbq *pwqeIn,
 lpfc_ncmd->status, lpfc_ncmd->result,
 wcqe->total_data_placed);
break;
+   case IOSTAT_LOCAL_REJECT:
+   /* Let fall through to set command final state. */
+   if (lpfc_ncmd->result == IOERR_ABORT_REQUESTED)
+   lpfc_printf_vlog(vport, KERN_INFO,
+LOG_NVME_IOERR,
+"6032 Delay Aborted cmd %p "
+"nvme cmd %p, xri x%x, "
+"xb %d\n",
+lpfc_ncmd, nCmd,
+lpfc_ncmd->cur_iocbq.sli4_xritag,
+bf_get(lpfc_wcqe_c_xb, wcqe));
default:
 out_err:
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
@@ -930,12 +941,18 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct 
lpfc_iocbq *pwqeIn,
 #endif
freqpriv = nCmd->private;
freqpriv->nvme_buf = NULL;
-   nCmd->done(nCmd);
+
+   /* NVME targets need completion held off until the abort exchange
+* completes.
+*/
+   if (!lpfc_ncmd->flags & LPFC_SBUF_XBUSY)
+   nCmd->done(nCmd);
 
spin_lock_irqsave(&phba->hbalock, flags);
lpfc_ncmd->nrport = NULL;
spin_unlock_irqrestore(&phba->hbalock, flags);
 
+   /* Call release with XB=1 to queue the IO into the abort list. */
lpfc_release_nvme_buf(phba, lpfc_ncmd);
 }
 
@@ -2064,9 +2081,6 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct 
lpfc_nodelist *ndlp)
spin_lock_irqsave(&phba->nvme_buf_list_get_lock, iflag);
list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
 &phba->lpfc_nvme_buf_list_get, list) {
-   if (lpfc_test_rrq_active(phba, ndlp,
-lpfc_ncmd->cur_iocbq.sli4_lxritag))
-   continue;
list_del_init(&lpfc_ncmd->list);
found = 1;
break;
@@ -2079,9 +2093,6 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct 
lpfc_nodelist *ndlp)
spin_unlock(&phba->nvme_buf_list_put_lock);
list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
 &phba->lpfc_nvme_buf_list_get, list) {
-   if (lpfc_test_rrq_active(
-   phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag))
-   continue;
list_del_init(&lpfc_ncmd->list);
found = 1;
break;
@@ -2118,7 +2129,6 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct 
lpfc_nvme_buf *lpfc_ncmd)
 
spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
iflag);
-   lpfc_ncmd->nvmeCmd = NULL;
list_add_tail(&lpfc_ncmd->list,
&phba->sli4_hba.lpfc_abts_nvme_buf_list);
spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock,
@@ -2486,18 +2496,18 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, 
struct lpfc_nodelist *ndlp)
  * @axri: pointer to the fcp xri abort wcqe structure.
  *
  * This routine is invoked by the worker thread to process a SLI4 fast-path
- * FCP aborted xri.
+ * NVME aborted xri.  Aborted NVME IO commands are completed to the transport
+ * here.
  **/
 void
 lpfc_sli4_nvme_xri_aborted(struct l

[PATCH v2 08/21] lpfc: Move CQ processing to a soft IRQ

2017-09-29 Thread James Smart
From: Dick Kennedy 

Under heavy target nvme load duration, the lpfc irq handler
is encountering cpu lockup warnings.

Convert the driver to a shortened ISR handler which identifies
the interrupting condition then schedules a workq thread to
process the completion queue the interrupt was for. This moves
all the real work into the workq element.

As nvmet_fc upcalls are no longer in ISR context, don't set the
feature flags

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc.h   |   3 +
 drivers/scsi/lpfc/lpfc_init.c  |  15 +
 drivers/scsi/lpfc/lpfc_nvmet.c |   4 +-
 drivers/scsi/lpfc/lpfc_sli.c   | 148 -
 drivers/scsi/lpfc/lpfc_sli4.h  |   4 +-
 5 files changed, 109 insertions(+), 65 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 8eb3f96fe068..231302273257 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -23,6 +23,7 @@
 
 #include 
 #include 
+#include 
 
 #if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_SCSI_LPFC_DEBUG_FS)
 #define CONFIG_SCSI_LPFC_DEBUG_FS
@@ -653,6 +654,8 @@ struct lpfc_hba {
/* SLI4 specific HBA data structure */
struct lpfc_sli4_hba sli4_hba;
 
+   struct workqueue_struct *wq;
+
struct lpfc_sli sli;
uint8_t pci_dev_grp;/* lpfc PCI dev group: 0x0, 0x1, 0x2,... */
uint32_t sli_rev;   /* SLI2, SLI3, or SLI4 */
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index b50c3b559a7a..4ffdde5808ee 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3216,6 +3216,9 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
lpfc_destroy_vport_work_array(phba, vports);
 
lpfc_sli_mbox_sys_shutdown(phba, mbx_action);
+
+   if (phba->wq)
+   flush_workqueue(phba->wq);
 }
 
 /**
@@ -4176,6 +4179,9 @@ void
 lpfc_stop_port(struct lpfc_hba *phba)
 {
phba->lpfc_stop_port(phba);
+
+   if (phba->wq)
+   flush_workqueue(phba->wq);
 }
 
 /**
@@ -6369,6 +6375,9 @@ lpfc_setup_driver_resource_phase2(struct lpfc_hba *phba)
return error;
}
 
+   /* workqueue for deferred irq use */
+   phba->wq = alloc_workqueue("lpfc_wq", WQ_MEM_RECLAIM, 0);
+
return 0;
 }
 
@@ -6383,6 +6392,12 @@ lpfc_setup_driver_resource_phase2(struct lpfc_hba *phba)
 static void
 lpfc_unset_driver_resource_phase2(struct lpfc_hba *phba)
 {
+   if (phba->wq) {
+   flush_workqueue(phba->wq);
+   destroy_workqueue(phba->wq);
+   phba->wq = NULL;
+   }
+
/* Stop kernel worker thread */
kthread_stop(phba->worker_thread);
 }
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index cee88f3c0779..a04b275f1b18 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1137,9 +1137,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
}
lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel;
-   lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
-  NVMET_FCTGTFEAT_CMD_IN_ISR |
-  NVMET_FCTGTFEAT_OPDONE_IN_ISR;
+   lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP;
 
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 3ace438d8f2e..a69332f0abcd 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -80,8 +80,8 @@ static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct 
lpfc_queue *,
struct lpfc_cqe *);
 static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
   int);
-static int lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba,
-   struct lpfc_eqe *eqe, uint32_t qidx);
+static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba,
+struct lpfc_eqe *eqe, uint32_t qidx);
 static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba);
 static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba);
 static int lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba,
@@ -13022,14 +13022,11 @@ lpfc_sli4_sp_handle_cqe(struct lpfc_hba *phba, struct 
lpfc_queue *cq,
  * completion queue, and then return.
  *
  **/
-static int
+static void
 lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
struct lpfc_queue *speq)
 {
struct lpfc_queue *cq = NULL, *childq;
-   struct lpfc_cqe *cqe;
-   bool workposted = false;
-   int ecount = 0;
uint16_t cqid;
 
/* Get the reference to the 

[PATCH v2 21/21] lpfc: change version to 11.4.0.4

2017-09-29 Thread James Smart
From: Dick Kennedy 

Change version to 11.4.0.4

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 6aa192b3e4bf..e0181371af09 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package. *
  ***/
 
-#define LPFC_DRIVER_VERSION "11.4.0.3"
+#define LPFC_DRIVER_VERSION "11.4.0.4"
 #define LPFC_DRIVER_NAME   "lpfc"
 
 /* Used for SLI 2/3 */
-- 
2.13.1



[PATCH v2 16/21] lpfc: Fix secure firmware updates

2017-09-29 Thread James Smart
From: Dick Kennedy 

Firmware update fails with: status x17 add_status x56 on the final write

If multiple DMA buffers are used for the download, some firmware revs
have difficulty with signatures and crcs split across the dma buffer
boundaries.  Resolve by making all writes be a single 4k page in length.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_hw4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 1db0a38683f4..2b145966c73f 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -3636,7 +3636,7 @@ struct lpfc_mbx_get_port_name {
 #define MB_CEQ_STATUS_QUEUE_FLUSHING   0x4
 #define MB_CQE_STATUS_DMA_FAILED   0x5
 
-#define LPFC_MBX_WR_CONFIG_MAX_BDE 8
+#define LPFC_MBX_WR_CONFIG_MAX_BDE 1
 struct lpfc_mbx_wr_object {
struct mbox_header header;
union {
-- 
2.13.1



[PATCH v2 04/21] lpfc: Fix lpfc nvme host rejecting IO with Not Ready message

2017-09-29 Thread James Smart
From: Dick Kennedy 

In a link bounce scenario, a condition can occur where the
discovery engine swaps an ndlp structure (address changbe for
an nport). While the swap was successfully executed by the
discovery engine, the driver did not properly detect a change in
the ndlp bound to the nvme rport.  This error resulted in the nvme
host transport issuing an IO to the correct nvme rport, but the
lpfc driver addressed a ndlp with an NLP_UNUSED status and failed
the io. This resulting it it looking like there were missing
namespaces and applications failed due to io errors.

To fix, in lpfc_nvme_register_rport, rework the "rebind" case
to break the nvme rport<->ndlp association when the ndlp
already has an nrport. Then rebind the rport to the correct
ndlp data and backpointers.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_nvme.c | 46 ++-
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 79ba3ce063a4..2ad23b356bfe 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2296,6 +2296,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct 
lpfc_nodelist *ndlp)
struct lpfc_nvme_rport *rport;
struct nvme_fc_remote_port *remote_port;
struct nvme_fc_port_info rpinfo;
+   struct lpfc_nodelist *prev_ndlp;
 
lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC,
 "6006 Register NVME PORT. DID x%06x nlptype x%x\n",
@@ -2332,7 +2333,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct 
lpfc_nodelist *ndlp)
 * new rport.
 */
rport = remote_port->private;
-   if (ndlp->nrport == rport) {
+   if (ndlp->nrport) {
lpfc_printf_vlog(ndlp->vport, KERN_INFO,
 LOG_NVME_DISC,
 "6014 Rebinding lport to "
@@ -2343,24 +2344,33 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, 
struct lpfc_nodelist *ndlp)
 remote_port->port_role,
 ndlp->nlp_type,
 ndlp->nlp_DID);
-   } else {
-   /* New rport. */
-   rport->remoteport = remote_port;
-   rport->lport = lport;
-   rport->ndlp = lpfc_nlp_get(ndlp);
-   if (!rport->ndlp)
-   return -1;
-   ndlp->nrport = rport;
-   lpfc_printf_vlog(vport, KERN_INFO,
-LOG_NVME_DISC | LOG_NODE,
-"6022 Binding new rport to "
-"lport %p Rport WWNN 0x%llx, "
-"Rport WWPN 0x%llx DID "
-"x%06x Role x%x\n",
-lport,
-rpinfo.node_name, rpinfo.port_name,
-rpinfo.port_id, rpinfo.port_role);
+   prev_ndlp = rport->ndlp;
+
+   /* Sever the ndlp<->rport connection before dropping
+* the ndlp ref from register.
+*/
+   ndlp->nrport = NULL;
+   rport->ndlp = NULL;
+   if (prev_ndlp)
+   lpfc_nlp_put(ndlp);
}
+
+   /* Clean bind the rport to the ndlp. */
+   rport->remoteport = remote_port;
+   rport->lport = lport;
+   rport->ndlp = lpfc_nlp_get(ndlp);
+   if (!rport->ndlp)
+   return -1;
+   ndlp->nrport = rport;
+   lpfc_printf_vlog(vport, KERN_INFO,
+LOG_NVME_DISC | LOG_NODE,
+"6022 Binding new rport to "
+"lport %p Rport WWNN 0x%llx, "
+"Rport WWPN 0x%llx DID "
+"x%06x Role x%x\n",
+lport,
+rpinfo.node_name, rpinfo.port_name,
+rpinfo.port_id, rpinfo.port_role);
} else {
lpfc_printf_vlog(vport, KERN_ERR,
 LOG_NVME_DISC | LOG_NODE,
-- 
2.13.1



[PATCH v2 10/21] lpfc: Reduce log spew on controller reconnects

2017-09-29 Thread James Smart
There are several log messages that report abnormal
terminations that by default are marked warn. These are
typically the result of failures due to invalid controller
state or abort completions. They are all natural when a
controller resets.

Unfortunately, as they are logged by default, it makes the
admin very concerned.

Convert the messages to Info.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_nvme.c  |  6 +++---
 drivers/scsi/lpfc/lpfc_nvmet.c | 16 ++--
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index a83b57254de6..e6948945a2f2 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -886,7 +886,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct 
lpfc_iocbq *pwqeIn,
break;
default:
 out_err:
-   lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+   lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
 "6072 NVME Completion Error: xri %x "
 "status x%x result x%x placed x%x\n",
 lpfc_ncmd->cur_iocbq.sli4_xritag,
@@ -1339,7 +1339,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port 
*pnvme_lport,
ret = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, &lpfc_ncmd->cur_iocbq);
if (ret) {
atomic_dec(&ndlp->cmd_pending);
-   lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+   lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
 "6113 FCP could not issue WQE err %x "
 "sid: x%x did: x%x oxid: x%x\n",
 ret, vport->fc_myDID, ndlp->nlp_DID,
@@ -1399,7 +1399,7 @@ void
 lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
   struct lpfc_wcqe_complete *abts_cmpl)
 {
-   lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+   lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
"6145 ABORT_XRI_CN completing on rpi x%x "
"original iotag x%x, abort cmd iotag x%x "
"req_tag x%x, status x%x, hwstatus x%x\n",
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index a04b275f1b18..354e60043ea1 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -76,7 +76,7 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct 
lpfc_nvmet_rcv_ctx *ctxp)
 {
unsigned long iflag;
 
-   lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+   lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
"6313 NVMET Defer ctx release xri x%x flg x%x\n",
ctxp->oxid, ctxp->flag);
 
@@ -494,7 +494,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct 
lpfc_iocbq *cmdwqe,
struct lpfc_nvmet_tgtport *tgtp;
struct nvmefc_tgt_fcp_req *rsp;
struct lpfc_nvmet_rcv_ctx *ctxp;
-   uint32_t status, result, op, start_clean;
+   uint32_t status, result, op, start_clean, logerr;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
uint32_t id;
 #endif
@@ -522,17 +522,21 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct 
lpfc_iocbq *cmdwqe,
if (tgtp)
atomic_inc(&tgtp->xmt_fcp_rsp_error);
 
+   logerr = LOG_NVME_IOERR;
+
/* pick up SLI4 exhange busy condition */
if (bf_get(lpfc_wcqe_c_xb, wcqe)) {
ctxp->flag |= LPFC_NVMET_XBUSY;
+   logerr |= LOG_NVME_ABTS;
 
-   lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-   "6315 IO Cmpl XBUSY: xri x%x: %x/%x\n",
-   ctxp->oxid, status, result);
} else {
ctxp->flag &= ~LPFC_NVMET_XBUSY;
}
 
+   lpfc_printf_log(phba, KERN_INFO, logerr,
+   "6315 IO Error Cmpl xri x%x: %x/%x XBUSY:x%x\n",
+   ctxp->oxid, status, result, ctxp->flag);
+
} else {
rsp->fcp_error = NVME_SC_SUCCESS;
if (op == NVMET_FCOP_RSP)
@@ -2323,7 +2327,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, 
struct lpfc_iocbq *cmdwqe,
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
atomic_inc(&tgtp->xmt_abort_rsp);
 
-   lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+   lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
"6165 ABORT cmpl: xri x%x flg x%x (%d) "
"WCQE: %08x %08x %08x %08x\n",
ctxp->oxid, ctxp->flag, released,
-- 
2.13.1



[PATCH v2 05/21] lpfc: Fix warning messages when NVME_TARGET_FC not defined

2017-09-29 Thread James Smart
From: Dick Kennedy 

Warning messages when NVME_TARGET_FC not defined on ppc builds

The lpfc_nvmet_replenish_context() function is only meaningful when
NVME target mode enabled. Surround the function body with ifdefs
for target mode enablement.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reported-by: Stephen Rothwell 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_nvmet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 346af470f360..b6cd213567ac 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1438,6 +1438,7 @@ static struct lpfc_nvmet_ctxbuf *
 lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
 struct lpfc_nvmet_ctx_info *current_infop)
 {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
struct lpfc_nvmet_ctxbuf *ctx_buf = NULL;
struct lpfc_nvmet_ctx_info *get_infop;
int i;
@@ -1485,6 +1486,7 @@ lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
get_infop = get_infop->nvmet_ctx_next_cpu;
}
 
+#endif
/* Nothing found, all contexts for the MRQ are in-flight */
return NULL;
 }
-- 
2.13.1



[PATCH v2 02/21] lpfc: fix pci hot plug crash in list_add call

2017-09-29 Thread James Smart
From: Dick Kennedy 

During pci hot plug, the kernel crashes in a list_add_call

The lookup by tag function will return null if the IOCB is out of
range or does not have the on txcmplq flag set.

Fix: Check for null return from lookup by tag.

Cc:  # 4.12+
Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 

---
v2: original v1 hot plug patch split into 2 patches

 drivers/scsi/lpfc/lpfc_sli.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 8b119f87b51d..b8513c1adcef 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -12507,19 +12507,21 @@ lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *phba,
/* Look up the ELS command IOCB and create pseudo response IOCB */
cmdiocbq = lpfc_sli_iocbq_lookup_by_tag(phba, pring,
bf_get(lpfc_wcqe_c_request_tag, wcqe));
-   /* Put the iocb back on the txcmplq */
-   lpfc_sli_ringtxcmpl_put(phba, pring, cmdiocbq);
-   spin_unlock_irqrestore(&pring->ring_lock, iflags);
-
if (unlikely(!cmdiocbq)) {
+   spin_unlock_irqrestore(&pring->ring_lock, iflags);
lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
"0386 ELS complete with no corresponding "
-   "cmdiocb: iotag (%d)\n",
-   bf_get(lpfc_wcqe_c_request_tag, wcqe));
+   "cmdiocb: 0x%x 0x%x 0x%x 0x%x\n",
+   wcqe->word0, wcqe->total_data_placed,
+   wcqe->parameter, wcqe->word3);
lpfc_sli_release_iocbq(phba, irspiocbq);
return NULL;
}
 
+   /* Put the iocb back on the txcmplq */
+   lpfc_sli_ringtxcmpl_put(phba, pring, cmdiocbq);
+   spin_unlock_irqrestore(&pring->ring_lock, iflags);
+
/* Fake the irspiocbq and copy necessary response information */
lpfc_sli4_iocb_param_transfer(phba, irspiocbq, cmdiocbq, wcqe);
 
@@ -17137,7 +17139,8 @@ lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
if (pcmd && pcmd->virt)
dma_pool_free(phba->lpfc_drb_pool, pcmd->virt, pcmd->phys);
kfree(pcmd);
-   lpfc_sli_release_iocbq(phba, iocbq);
+   if (iocbq)
+   lpfc_sli_release_iocbq(phba, iocbq);
lpfc_in_buf_free(phba, &dmabuf->dbuf);
 }
 
-- 
2.13.1



[PATCH v2 09/21] lpfc: Fix FCP hba_wqidx assignment

2017-09-29 Thread James Smart
From: Dick Kennedy 

The driver is encountering  oops in lpfc_sli_calc_ring.

The driver is setting hba_wqidx for FCP based on the
policy in use for NVME. The two may not be the same.
Change to set the wqidx based on the FCP policy.

Cc:  # 4.12+
Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_sli.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index a69332f0abcd..e50f1c1e484d 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -9396,10 +9396,13 @@ lpfc_sli4_calc_ring(struct lpfc_hba *phba, struct 
lpfc_iocbq *piocb)
 * for abort iocb hba_wqidx should already
 * be setup based on what work queue we used.
 */
-   if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX))
+   if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
piocb->hba_wqidx =
lpfc_sli4_scmd_to_wqidx_distr(phba,
  piocb->context1);
+   piocb->hba_wqidx = piocb->hba_wqidx %
+   phba->cfg_fcp_io_channel;
+   }
return phba->sli4_hba.fcp_wq[piocb->hba_wqidx]->pring;
} else {
if (unlikely(!phba->sli4_hba.oas_wq))
-- 
2.13.1



[PATCH v2 06/21] lpfc: PLOGI failures during NPIV testing

2017-09-29 Thread James Smart
From: Dick Kennedy 

Local Reject/Invalid RPI errors seen during discovery.

Temporary RPI cleanup was occurring regardless of SLI rev. It's
only necessary on SLI-4.

Adjust the test for whether cleanup is necessary.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_hbadisc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 499df9d17339..d9a03beb76a4 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -4983,7 +4983,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct 
lpfc_nodelist *ndlp)
lpfc_cancel_retry_delay_tmo(vport, ndlp);
if ((ndlp->nlp_flag & NLP_DEFER_RM) &&
!(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) &&
-   !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) {
+   !(ndlp->nlp_flag & NLP_RPI_REGISTERED) &&
+   phba->sli_rev != LPFC_SLI_REV4) {
/* For this case we need to cleanup the default rpi
 * allocated by the firmware.
 */
-- 
2.13.1



[PATCH v2 15/21] lpfc: Fix crash in lpfc_nvme_fcp_io_submit during LIP

2017-09-29 Thread James Smart
From: Dick Kennedy 

The driver is seeing a NULL pointer in lpfc_nvme_fcp_io_submit.
This was ultimately due to a transport AER being sent on a terminated
controller, thus some of the values were not set. In case we're in a
system without a corrected transport and in case a race condition
occurs where we enter the routine as the teardown is happening in a
separate thread, validate the parameters before starting the io.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_nvme.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index e6948945a2f2..02ba06f364c4 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1235,6 +1235,16 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port 
*pnvme_lport,
vport = lport->vport;
phba = vport->phba;
 
+   /* Validate pointers. */
+   if (!pnvme_lport || !pnvme_rport || !freqpriv) {
+   lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR | LOG_NODE,
+"6117 No Send:IO submit ptrs NULL, lport %p, "
+"rport %p fcreq_priv %p\n",
+pnvme_lport, pnvme_rport, freqpriv);
+   ret = -ENODEV;
+   goto out_fail;
+   }
+
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (phba->ktime_on)
start = ktime_get_ns();
-- 
2.13.1



[PATCH v2 01/21] lpfc: fix pci hot plug crash in timer management routines

2017-09-29 Thread James Smart
From: Dick Kennedy 

During pci hot plug, the kernel crashes in timer management code.

The sli4 remove_one handler is not stoping the timers as it starts to
remove the port so that it can be swapped.

Fix: Stop the timers early in the handler routine.

Note: Fix in SLI-4 only. SLI-3 already stopped the timers properly.

Cc:  # 4.12+
Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 

---
v2: original v1 hot plug patch split into 2 patches

 drivers/scsi/lpfc/lpfc_init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 7e7ae786121b..1773b9ce3149 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -11419,6 +11419,7 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
lpfc_debugfs_terminate(vport);
lpfc_sli4_hba_unset(phba);
 
+   lpfc_stop_hba_timers(phba);
spin_lock_irq(&phba->hbalock);
list_del_init(&vport->listentry);
spin_unlock_irq(&phba->hbalock);
-- 
2.13.1



[PATCH v2 00/21] lpfc updates for 11.4.0.4

2017-09-29 Thread James Smart
This patch set provides a number of bug fixes and additions to
the driver.

The patches were cut against the Martin's 4.14/scsi-queue tree.
There are no outside dependencies.

v2:
  Split patch 1 into separate patches
  Added stable cc's where requested
  "Fix oops of nvme host during driver unload" revised to only
block submit routines due to bug. abort routines must
function to cleanup.
  Add "correct nvme sg segment count check" patch

Dick Kennedy (18):
  lpfc: fix pci hot plug crash in timer management routines
  lpfc: fix pci hot plug crash in list_add call
  lpfc: Fix crash receiving ELS while detaching driver
  lpfc: Fix lpfc nvme host rejecting IO with Not Ready message
  lpfc: Fix warning messages when NVME_TARGET_FC not defined
  lpfc: PLOGI failures during NPIV testing
  lpfc: Make ktime sampling more accurate
  lpfc: Move CQ processing to a soft IRQ
  lpfc: Fix FCP hba_wqidx assignment
  lpfc: Revise NVME module parameter descriptions for better clarity
  lpfc: Fix oops if nvmet_fc_register_targetport fails
  lpfc: Disable NPIV support if NVME is enabled
  lpfc: Fix crash in lpfc_nvme_fcp_io_submit during LIP
  lpfc: Fix secure firmware updates
  lpfc: Ensure io aborts interlocked with the target.
  lpfc: Extend RDP support
  lpfc: Fix oops of nvme host during driver unload.
  lpfc: change version to 11.4.0.4

James Smart (3):
  lpfc: Reduce log spew on controller reconnects
  lpfc: Set missing abort context
  lpfc: correct nvme sg segment count check

 drivers/scsi/lpfc/lpfc.h   |   3 +
 drivers/scsi/lpfc/lpfc_attr.c  |  10 +-
 drivers/scsi/lpfc/lpfc_bsg.c   |   4 +-
 drivers/scsi/lpfc/lpfc_els.c   |  19 ++--
 drivers/scsi/lpfc/lpfc_hbadisc.c   |   8 +-
 drivers/scsi/lpfc/lpfc_hw4.h   |   2 +-
 drivers/scsi/lpfc/lpfc_init.c  |  30 --
 drivers/scsi/lpfc/lpfc_nportdisc.c |   2 +-
 drivers/scsi/lpfc/lpfc_nvme.c  | 167 ++--
 drivers/scsi/lpfc/lpfc_nvmet.c | 148 +
 drivers/scsi/lpfc/lpfc_sli.c   | 216 +++--
 drivers/scsi/lpfc/lpfc_sli4.h  |   4 +-
 drivers/scsi/lpfc/lpfc_version.h   |   2 +-
 drivers/scsi/lpfc/lpfc_vport.c |   9 ++
 14 files changed, 405 insertions(+), 219 deletions(-)

-- 
2.13.1



[PATCH v2 03/21] lpfc: Fix crash receiving ELS while detaching driver

2017-09-29 Thread James Smart
From: Dick Kennedy 

The driver crashes when attempting to use a freed ndpl pointer.

The pci_remove_one handler runs on a separate kernel thread. The order
of the removal is starting by freeing all of the ndlps and then
disabling interrupts. In between these two events the driver can still
receive an ELS and process it. When it tries to use the ndlp pointer
will be NULL

Change the order of the pci_remove_one vs disable interrupts so that
interrupts are disabled before the ndlp's are freed.

Cc:  # 4.12+
Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
Reviewed-by: Johannes Thumshirn 
---
 drivers/scsi/lpfc/lpfc_attr.c  |  6 --
 drivers/scsi/lpfc/lpfc_bsg.c   |  4 +++-
 drivers/scsi/lpfc/lpfc_els.c   |  7 ++-
 drivers/scsi/lpfc/lpfc_hbadisc.c   |  5 -
 drivers/scsi/lpfc/lpfc_init.c  | 14 +++---
 drivers/scsi/lpfc/lpfc_nportdisc.c |  2 +-
 drivers/scsi/lpfc/lpfc_sli.c   | 12 
 7 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 0806323829e6..f02269b46049 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -3132,7 +3132,8 @@ lpfc_txq_hw_show(struct device *dev, struct 
device_attribute *attr, char *buf)
struct lpfc_hba   *phba = ((struct lpfc_vport *) shost->hostdata)->phba;
struct lpfc_sli_ring *pring = lpfc_phba_elsring(phba);
 
-   return snprintf(buf, PAGE_SIZE, "%d\n", pring->txq_max);
+   return snprintf(buf, PAGE_SIZE, "%d\n",
+   pring ? pring->txq_max : 0);
 }
 
 static DEVICE_ATTR(txq_hw, S_IRUGO,
@@ -3145,7 +3146,8 @@ lpfc_txcmplq_hw_show(struct device *dev, struct 
device_attribute *attr,
struct lpfc_hba   *phba = ((struct lpfc_vport *) shost->hostdata)->phba;
struct lpfc_sli_ring *pring = lpfc_phba_elsring(phba);
 
-   return snprintf(buf, PAGE_SIZE, "%d\n", pring->txcmplq_max);
+   return snprintf(buf, PAGE_SIZE, "%d\n",
+   pring ? pring->txcmplq_max : 0);
 }
 
 static DEVICE_ATTR(txcmplq_hw, S_IRUGO,
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index fe9e1c079c20..d89816222b23 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2911,7 +2911,7 @@ static int lpfcdiag_loop_post_rxbufs(struct lpfc_hba 
*phba, uint16_t rxxri,
}
}
 
-   if (!cmdiocbq || !rxbmp || !rxbpl || !rxbuffer) {
+   if (!cmdiocbq || !rxbmp || !rxbpl || !rxbuffer || !pring) {
ret_val = -ENOMEM;
goto err_post_rxbufs_exit;
}
@@ -5421,6 +5421,8 @@ lpfc_bsg_timeout(struct bsg_job *job)
struct lpfc_iocbq *check_iocb, *next_iocb;
 
pring = lpfc_phba_elsring(phba);
+   if (unlikely(!pring))
+   return -EIO;
 
/* if job's driver data is NULL, the command completed or is in the
 * the process of completing.  In this case, return status to request
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 468a66371de9..3ebf6ccba6e6 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -7430,6 +7430,8 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport)
timeout = (uint32_t)(phba->fc_ratov << 1);
 
pring = lpfc_phba_elsring(phba);
+   if (unlikely(!pring))
+   return;
 
if ((phba->pport->load_flag & FC_UNLOADING))
return;
@@ -9310,6 +9312,9 @@ void lpfc_fabric_abort_nport(struct lpfc_nodelist *ndlp)
 
pring = lpfc_phba_elsring(phba);
 
+   if (unlikely(!pring))
+   return;
+
spin_lock_irq(&phba->hbalock);
list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list,
 list) {
@@ -9416,7 +9421,7 @@ lpfc_sli4_els_xri_aborted(struct lpfc_hba *phba,
rxid, 1);
 
/* Check if TXQ queue needs to be serviced */
-   if (!(list_empty(&pring->txq)))
+   if (pring && !list_empty(&pring->txq))
lpfc_worker_wake_up(phba);
return;
}
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 20808349a80e..499df9d17339 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -3324,7 +3324,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, 
LPFC_MBOXQ_t *pmb)
 
/* Unblock ELS traffic */
pring = lpfc_phba_elsring(phba);
-   pring->flag &= ~LPFC_STOP_IOCB_EVENT;
+   if (pring)
+   pring->flag &= ~LPFC_STOP_IOCB_EVENT;
 
/* Check for error */
if (mb->mbxStatus) {
@@ -5430,6 +5431,8 @@ lpfc_free_tx(struct lpfc_hba *phba, struct lpfc_nodelist 
*ndlp)
 
psli = &phba->sli;
pring = lpfc_phba_elsring(phba);
+   if (unlikely(!pring))
+   return;
 
 

[PATCH v2 07/21] lpfc: Make ktime sampling more accurate

2017-09-29 Thread James Smart
From: Dick Kennedy 

Need to make ktime samples more accurate

If ktime is turned on in the middle of an IO, the max calculation
could be misleading. Base sampling on the start time of the IO
as opposed to ktime_on.

Make ISR ktime timestamps be from when CQE is read instead of EQE.
Added additional sanity checks when deciding whether to accept an
IO sample or not.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_nvme.c  | 34 ++-
 drivers/scsi/lpfc/lpfc_nvmet.c | 99 --
 drivers/scsi/lpfc/lpfc_sli.c   | 34 +--
 3 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 2ad23b356bfe..a83b57254de6 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -667,15 +667,17 @@ lpfc_nvme_ktime(struct lpfc_hba *phba,
struct lpfc_nvme_buf *lpfc_ncmd)
 {
uint64_t seg1, seg2, seg3, seg4;
+   uint64_t segsum;
 
-   if (!phba->ktime_on)
-   return;
if (!lpfc_ncmd->ts_last_cmd ||
!lpfc_ncmd->ts_cmd_start ||
!lpfc_ncmd->ts_cmd_wqput ||
!lpfc_ncmd->ts_isr_cmpl ||
!lpfc_ncmd->ts_data_nvme)
return;
+
+   if (lpfc_ncmd->ts_data_nvme < lpfc_ncmd->ts_cmd_start)
+   return;
if (lpfc_ncmd->ts_cmd_start < lpfc_ncmd->ts_last_cmd)
return;
if (lpfc_ncmd->ts_cmd_wqput < lpfc_ncmd->ts_cmd_start)
@@ -695,15 +697,23 @@ lpfc_nvme_ktime(struct lpfc_hba *phba,
 * cmpl is handled off to the NVME Layer.
 */
seg1 = lpfc_ncmd->ts_cmd_start - lpfc_ncmd->ts_last_cmd;
-   if (seg1 > 500)  /* 5 ms - for sequential IOs */
-   return;
+   if (seg1 > 500)  /* 5 ms - for sequential IOs only */
+   seg1 = 0;
 
/* Calculate times relative to start of IO */
seg2 = (lpfc_ncmd->ts_cmd_wqput - lpfc_ncmd->ts_cmd_start);
-   seg3 = (lpfc_ncmd->ts_isr_cmpl -
-   lpfc_ncmd->ts_cmd_start) - seg2;
-   seg4 = (lpfc_ncmd->ts_data_nvme -
-   lpfc_ncmd->ts_cmd_start) - seg2 - seg3;
+   segsum = seg2;
+   seg3 = lpfc_ncmd->ts_isr_cmpl - lpfc_ncmd->ts_cmd_start;
+   if (segsum > seg3)
+   return;
+   seg3 -= segsum;
+   segsum += seg3;
+
+   seg4 = lpfc_ncmd->ts_data_nvme - lpfc_ncmd->ts_cmd_start;
+   if (segsum > seg4)
+   return;
+   seg4 -= segsum;
+
phba->ktime_data_samples++;
phba->ktime_seg1_total += seg1;
if (seg1 < phba->ktime_seg1_min)
@@ -902,7 +912,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct 
lpfc_iocbq *pwqeIn,
 * owns the dma address.
 */
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-   if (phba->ktime_on) {
+   if (lpfc_ncmd->ts_cmd_start) {
lpfc_ncmd->ts_isr_cmpl = pwqeIn->isr_timestamp;
lpfc_ncmd->ts_data_nvme = ktime_get_ns();
phba->ktime_last_cmd = lpfc_ncmd->ts_data_nvme;
@@ -1283,9 +1293,11 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port 
*pnvme_lport,
goto out_fail;
}
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-   if (phba->ktime_on) {
+   if (start) {
lpfc_ncmd->ts_cmd_start = start;
lpfc_ncmd->ts_last_cmd = phba->ktime_last_cmd;
+   } else {
+   lpfc_ncmd->ts_cmd_start = 0;
}
 #endif
 
@@ -1336,7 +1348,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port 
*pnvme_lport,
}
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-   if (phba->ktime_on)
+   if (lpfc_ncmd->ts_cmd_start)
lpfc_ncmd->ts_cmd_wqput = ktime_get_ns();
 
if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index b6cd213567ac..cee88f3c0779 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -221,9 +221,8 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct 
lpfc_nvmet_ctxbuf *ctx_buf)
spin_lock_init(&ctxp->ctxlock);
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-   if (phba->ktime_on) {
+   if (ctxp->ts_cmd_nvme) {
ctxp->ts_cmd_nvme = ktime_get_ns();
-   ctxp->ts_isr_cmd = ctxp->ts_cmd_nvme;
ctxp->ts_nvme_data = 0;
ctxp->ts_data_wqput = 0;
ctxp->ts_isr_data = 0;
@@ -289,9 +288,7 @@ lpfc_nvmet_ktime(struct lpfc_hba *phba,
 {
uint64_t seg1, seg2, seg3, seg4, seg5;
uint64_t seg6, seg7, seg8, seg9, seg10;
-
-   if (!phba->ktime_on)
-   return;
+   uint64_t segsum;
 
if (!ctxp->ts_isr_cmd || !ctxp->ts_cmd_nvme ||
!ctxp->ts_nvme_data || !ctxp->ts_data_wqput ||
@@ -300,6 +297,8 @@ lpfc_nvmet_ktime(struct lpfc_hba *phba,
!ctxp->ts_isr_status || !

Re: [Patch v2 1/2] libiscsi: Fix use-after-free race during iscsi_session_teardown

2017-09-29 Thread Khazhismel Kumykov
Noticed these don't seem to be in 4.14/scsi-queue

On Tue, Aug 29, 2017 at 6:45 PM, Martin K. Petersen
 wrote:
>
> Chris,
>
>> Looks good to me, fixes up the code given that the comment there about
>> calling iscsi_remove_session wasn't being followed.
>
> Applied these two to 4.14/scsi-queue.
>
> --
> Martin K. Petersen  Oracle Linux Engineering


smime.p7s
Description: S/MIME Cryptographic Signature


Re: [PATCH V6 0/6] block/scsi: safe SCSI quiescing

2017-09-29 Thread Martin Steigerwald
Ming Lei - 27.09.17, 16:27:
> On Wed, Sep 27, 2017 at 09:57:37AM +0200, Martin Steigerwald wrote:
> > Hi Ming.
> > 
> > Ming Lei - 27.09.17, 13:48:
> > > Hi,
> > > 
> > > The current SCSI quiesce isn't safe and easy to trigger I/O deadlock.
> > > 
> > > Once SCSI device is put into QUIESCE, no new request except for
> > > RQF_PREEMPT can be dispatched to SCSI successfully, and
> > > scsi_device_quiesce() just simply waits for completion of I/Os
> > > dispatched to SCSI stack. It isn't enough at all.
> > > 
> > > Because new request still can be comming, but all the allocated
> > > requests can't be dispatched successfully, so request pool can be
> > > consumed up easily.
> > > 
> > > Then request with RQF_PREEMPT can't be allocated and wait forever,
> > > meantime scsi_device_resume() waits for completion of RQF_PREEMPT,
> > > then system hangs forever, such as during system suspend or
> > > sending SCSI domain alidation.
> > > 
> > > Both IO hang inside system suspend[1] or SCSI domain validation
> > > were reported before.
> > > 
> > > This patch introduces preempt only mode, and solves the issue
> > > by allowing RQF_PREEMP only during SCSI quiesce.
> > > 
> > > Both SCSI and SCSI_MQ have this IO deadlock issue, this patch fixes
> > > them all.
> > > 
> > > V6:
> > >   - borrow Bart's idea of preempt only, with clean
> > >   
> > > implementation(patch 5/patch 6)
> > >   
> > >   - needn't any external driver's dependency, such as MD's
> > >   change
> > 
> > Do you want me to test with v6 of the patch set? If so, it would be nice
> > if
> > you´d make a v6 branch in your git repo.
> 
> Hi Martin,
> 
> I appreciate much if you may run V6 and provide your test result,
> follows the branch:
> 
> https://github.com/ming1/linux/tree/blk_safe_scsi_quiesce_V6
> 
> https://github.com/ming1/linux.git #blk_safe_scsi_quiesce_V6
> 
> > After an uptime of almost 6 days I am pretty confident that the V5 one
> > fixes the issue for me. So
> > 
> > Tested-by: Martin Steigerwald 
> > 
> > for V5.
> 
> Thanks for your test!

Two days and almost 6 hours, no hang yet. I bet the whole thing works. 
(3e45474d7df3bfdabe4801b5638d197df9810a79)

Tested-By: Martin Steigerwald 

(It could still hang after three days, but usually I got the first hang within 
the first two days.)

Thanks,
-- 
Martin


Re: [PATCH] scsi: sd: Implement blacklist option for WRITE SAME w/ UNMAP

2017-09-29 Thread Laurence Oberman
On Fri, 2017-09-29 at 09:21 -0400, Martin K. Petersen wrote:
> Laurence,
> 
> > I am testing this but its not being picked up so I want to know if
> > I
> > have the kernel command line wrong here.
> > 
> > scsi_dev_flags=LIO-ORG:thin2:0x8000
> > 
> > What am I doing wrong to pass the BLIST flags.
> 
> This worked for me:
> 
> [root@kvm ~]# echo "Linux:scsi_debug:0x8000" >
> /proc/scsi/device_info
> [root@kvm ~]# grep Linux /proc/scsi/device_info 
> 'Linux   ' 'scsi_debug  ' 0x8000
> [root@kvm ~]# modprobe scsi_debug unmap_max_blocks=10
> unmap_max_desc=1 write_same_length=20 lbpws=1
> [root@kvm ~]# lsblk -D
> NAME DISC-ALN DISC-GRAN DISC-MAX DISC-ZERO
> sda 0  512B   5K 0
> 
> (With the caveat that I tweaked scsi_debug to report the UNMAP
> parameters despite lbpu being 0).
> 

OK, Thanks, that is working now and I pick up the correct size now.
Its going to be very useful for these corner case array
inconsistencies.

Tested-by: Laurence Oberman 

Sep 29 09:56:11 localhost kernel: scsi 1:0:0:50: Direct-Access LIO-
ORG  thin24.0  PQ: 0 ANSI: 5
Sep 29 09:56:11 localhost kernel: scsi 1:0:0:50: alua: supports
implicit and explicit TPGS
Sep 29 09:56:11 localhost kernel: scsi 1:0:0:50: alua: device
naa.6001405f7aa27ca453f4381a00f22ea6 port group 0 rel port 2
Sep 29 09:56:11 localhost kernel: sd 1:0:0:50: Attached scsi generic
sg64 type 0
Sep 29 09:56:11 localhost kernel: RHDEBUG: unmap_limit_for_ws set by
kernel flag for case SD_LBP_WS16
Sep 29 09:56:11 localhost kernel: sd 1:0:0:50: [sdbl] 8192 512-byte 
logical blocks: (41.9 GB/39.1 GiB)
Sep 29 09:56:11 localhost kernel: sd 1:0:0:50: [sdbl] Write Protect is
off
Sep 29 09:56:11 localhost kernel: sd 1:0:0:50: [sdbl] Write cache:
enabled, read cache: enabled, supports DPO and FUA
Sep 29 09:56:11 localhost kernel: sd 1:0:0:50: alua: transition timeout
set to 60 seconds


RE: [PATCH] scsi: sd: Do not override max_sectors_kb sysfs setting

2017-09-29 Thread Don Brace
> -Original Message-
> From: linux-scsi-ow...@vger.kernel.org [mailto:linux-scsi-
> ow...@vger.kernel.org] On Behalf Of Martin K. Petersen
> Sent: Wednesday, September 27, 2017 8:39 PM
> To: linux-scsi@vger.kernel.org
> Cc: Martin K. Petersen 
> Subject: [PATCH] scsi: sd: Do not override max_sectors_kb sysfs setting
> 
> EXTERNAL EMAIL
> 
> 
> A user may lower the max_sectors_kb setting in sysfs to accommodate
> certain workloads. Previously we would always set the max I/O size to
> either the block layer default or the optional preferred I/O size
> reported by the device.
> 
> Keep the current heuristics for the initial setting of max_sectors_kb.
> For subsequent invocations, only update the current queue limit if it
> exceeds the capabilities of the hardware.
> 
> Reported-by: Don Brace 
> Signed-off-by: Martin K. Petersen 

Tested-by: Don Brace 

Really appreciate your attention to this matter.

Thanks,
Don Brace
ESC - Smart Storage
Microsemi Corporation





Re: [PATCH] scsi: fix the issue that iscsi_if_rx doesn't parse nlmsg properly

2017-09-29 Thread Ewan D. Milne
On Mon, 2017-09-25 at 15:28 -0400, Martin K. Petersen wrote:
> Xin,
> 
> > ChunYu found a kernel crash by syzkaller:
> 
> [...]
> 
> > It's caused by skb_shared_info at the end of sk_buff was overwritten by
> > ISCSI_KEVENT_IF_ERROR when parsing nlmsg info from skb in iscsi_if_rx.
> >
> > During the loop if skb->len == nlh->nlmsg_len and both are sizeof(*nlh),
> > ev = nlmsg_data(nlh) will acutally get skb_shinfo(SKB) instead and set a
> > new value to skb_shinfo(SKB)->nr_frags by ev->type.
> >
> > This patch is to fix it by checking nlh->nlmsg_len properly there to
> > avoid over accessing sk_buff.
> 
> Applied to 4.14/scsi-fixes. Thank you!
> 

Should this be considered for -stable?  (Despite not being reproduced
after 7f564528a4).




Re: [PATCH] scsi: sd: Implement blacklist option for WRITE SAME w/ UNMAP

2017-09-29 Thread Martin K. Petersen

Laurence,

> I am testing this but its not being picked up so I want to know if I
> have the kernel command line wrong here.
>
> scsi_dev_flags=LIO-ORG:thin2:0x8000
>
> What am I doing wrong to pass the BLIST flags.

This worked for me:

[root@kvm ~]# echo "Linux:scsi_debug:0x8000" > /proc/scsi/device_info
[root@kvm ~]# grep Linux /proc/scsi/device_info 
'Linux   ' 'scsi_debug  ' 0x8000
[root@kvm ~]# modprobe scsi_debug unmap_max_blocks=10 unmap_max_desc=1 
write_same_length=20 lbpws=1
[root@kvm ~]# lsblk -D
NAME DISC-ALN DISC-GRAN DISC-MAX DISC-ZERO
sda 0  512B   5K 0

(With the caveat that I tweaked scsi_debug to report the UNMAP
parameters despite lbpu being 0).

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCH] scsi: sd: Do not override max_sectors_kb sysfs setting

2017-09-29 Thread Martin Wilck
On Wed, 2017-09-27 at 21:38 -0400, Martin K. Petersen wrote:
> A user may lower the max_sectors_kb setting in sysfs to accommodate
> certain workloads. Previously we would always set the max I/O size to
> either the block layer default or the optional preferred I/O size
> reported by the device.
> 
> Keep the current heuristics for the initial setting of
> max_sectors_kb.
> For subsequent invocations, only update the current queue limit if it
> exceeds the capabilities of the hardware.
> 
> Reported-by: Don Brace 
> Signed-off-by: Martin K. Petersen 
 
This looks good to me. I agree that it's superior to the original
suggestion, because it sets the soft limit to the hard limit when the
device is scanned for the first time.

Regards
Martin

-- 
Dr. Martin Wilck , Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)



Re: [PATCH 3/3] smartpqi: update driver version to 1.1.2-126

2017-09-29 Thread Tomas Henzl
On 27.9.2017 23:30, Don Brace wrote:
> Reviewed-by: Gerry Morong 
> Reviewed-by: Scott Benesh 
> Reviewed-by: Scott Teel 
> Signed-off-by: Kevin Barnett 
> Signed-off-by: Don Brace 

Reviewed-by: Tomas Henzl 



Re: [PATCH 2/3] smartpqi: cleanup raid map warning message

2017-09-29 Thread Tomas Henzl
On 27.9.2017 23:30, Don Brace wrote:
> From: Kevin Barnett 
>
> Fix a small cosmetic bug in a very rarely encountered
> error message that can occur when a LD has a corrupted
> raid map.
>
> Reviewed-by: Scott Benesh 
> Signed-off-by: Kevin Barnett 
> Signed-off-by: Don Brace 

Reviewed-by: Tomas Henzl 



Re: [PATCH 1/3] smartpqi: update controller ids

2017-09-29 Thread Tomas Henzl
On 27.9.2017 23:29, Don Brace wrote:
> From: Kevin Barnett 
>
> Update the driver’s PCI IDs
>
> Reviewed-by: Scott Benesh 
> Reviewed-by: Scott Teel 
> Signed-off-by: Kevin Barnett 
> Signed-off-by: Don Brace 

Reviewed-by: Tomas Henzl 



Re: [PATCH] scsi: sd: Implement blacklist option for WRITE SAME w/ UNMAP

2017-09-29 Thread Laurence Oberman
On Wed, 2017-09-27 at 21:35 -0400, Martin K. Petersen wrote:
> SBC-4 states:
> 
>   "A MAXIMUM UNMAP LBA COUNT field set to a non-zero value indicates
> the
>    maximum number of LBAs that may be unmapped by an UNMAP command"
> 
>   "A MAXIMUM WRITE SAME LENGTH field set to a non-zero value
> indicates
>    the maximum number of contiguous logical blocks that the device
> server
>    allows to be unmapped or written in a single WRITE SAME command."
> 
> Despite the spec being clear on the topic, some devices incorrectly
> expect WRITE SAME commands with the UNMAP bit set to be limited to
> the
> value reported in MAXIMUM UNMAP LBA COUNT in the Block Limits VPD.
> 
> Implement a blacklist option that can be used to accommodate devices
> with this behavior.
> 
> Reported-by: Bill Kuzeja 
> Reported-by: Ewan D. Milne 
> Signed-off-by: Martin K. Petersen 
> ---
>  drivers/scsi/scsi_scan.c|  3 +++
>  drivers/scsi/sd.c   | 16 
>  include/scsi/scsi_device.h  |  1 +
>  include/scsi/scsi_devinfo.h |  1 +
>  4 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index e7818afeda2b..15590a063ad9 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -956,6 +956,9 @@ static int scsi_add_lun(struct scsi_device *sdev,
> unsigned char *inq_result,
>   if (*bflags & BLIST_NO_DIF)
>   sdev->no_dif = 1;
>  
> + if (*bflags & BLIST_UNMAP_LIMIT_WS)
> + sdev->unmap_limit_for_ws = 1;
> +
>   sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT;
>  
>   if (*bflags & BLIST_TRY_VPD_PAGES)
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index b18ba3235900..347be7580181 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -715,13 +715,21 @@ static void sd_config_discard(struct scsi_disk
> *sdkp, unsigned int mode)
>   break;
>  
>   case SD_LBP_WS16:
> - max_blocks = min_not_zero(sdkp->max_ws_blocks,
> -   (u32)SD_MAX_WS16_BLOCKS);
> + if (sdkp->device->unmap_limit_for_ws)
> + max_blocks = sdkp->max_unmap_blocks;
> + else
> + max_blocks = sdkp->max_ws_blocks;
> +
> + max_blocks = min_not_zero(max_blocks,
> (u32)SD_MAX_WS16_BLOCKS);
>   break;
>  
>   case SD_LBP_WS10:
> - max_blocks = min_not_zero(sdkp->max_ws_blocks,
> -   (u32)SD_MAX_WS10_BLOCKS);
> + if (sdkp->device->unmap_limit_for_ws)
> + max_blocks = sdkp->max_unmap_blocks;
> + else
> + max_blocks = sdkp->max_ws_blocks;
> +
> + max_blocks = min_not_zero(max_blocks,
> (u32)SD_MAX_WS10_BLOCKS);
>   break;
>  
>   case SD_LBP_ZERO:
> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
> index 82e93ee94708..67c5a9f223f7 100644
> --- a/include/scsi/scsi_device.h
> +++ b/include/scsi/scsi_device.h
> @@ -192,6 +192,7 @@ struct scsi_device {
>   unsigned no_dif:1;  /* T10 PI (DIF) should be disabled
> */
>   unsigned broken_fua:1;  /* Don't set FUA bit
> */
>   unsigned lun_in_cdb:1;  /* Store LUN bits in
> CDB[1] */
> + unsigned unmap_limit_for_ws:1;  /* Use the UNMAP limit
> for WRITE SAME */
>  
>   atomic_t disk_events_disable_depth; /* disable depth for
> disk events */
>  
> diff --git a/include/scsi/scsi_devinfo.h
> b/include/scsi/scsi_devinfo.h
> index 9592570e092a..36b03013d629 100644
> --- a/include/scsi/scsi_devinfo.h
> +++ b/include/scsi/scsi_devinfo.h
> @@ -29,5 +29,6 @@
>  #define BLIST_TRY_VPD_PAGES  0x1000 /* Attempt to read VPD
> pages */
>  #define BLIST_NO_RSOC0x2000 /* don't try to
> issue RSOC */
>  #define BLIST_MAX_1024   0x4000 /* maximum 1024
> sector cdb length */
> +#define BLIST_UNMAP_LIMIT_WS 0x8000 /* Use UNMAP limit
> for WRITE SAME */
>  
>  #endif

Hello Martin
I am testing this but its not being picked up so I want to know if I
have the kernel command line wrong here.

scsi_dev_flags=LIO-ORG:thin2:0x8000

Device is here
[   16.853083] scsi 4:0:0:50: Direct-Access LIO-
ORG  thin24.0  PQ: 0 ANSI: 5

Have a couple of printk's in now to see if I see the flags and they
dont trigger

case SD_LBP_WS16:
if (sdkp->device->unmap_limit_for_ws) {
max_blocks = sdkp->max_unmap_blocks;
printk("RHDEBUG: unmap_limit_for_ws set by
kernel flag for case SD_LBP_WS16\n");
}
else
max_blocks = sdkp->max_ws_blocks;

max_blocks = min_not_zero(max_blocks,
(u32)SD_MAX_WS16_BLOCKS);
break;

case SD_LBP_WS10:
if (sdkp->device->unmap_limit_for_ws) {
max_blocks = sdkp->max_unmap_blocks;