[PATCH 3/8] lpfc: Fix list corruption on the completion queue.

2018-07-31 Thread James Smart
Enabling list_debug showed the drivers txcmplq was suffering list
corruption. The systems will eventually crash because the
iocb free list gets crossed linked with the prings txcmplq.
Most systems will run for a while after the corruption, but will
eventually crash when a scsi eh reset occurs and the txcmplq is
attempted to be flushed. The flush gets stuck in an endless loop.

The problem is the abort handler does not hold the sli4 ring lock while
validating the IO so the IO could complete while the driver is still
preping the abort.  The erroneously generated, abort when it completes,
has pointers to the original io that has already completed, and the
io manipulation (for the second time) corrupts the list.

Correct by taking the ring lock early in the abort handler so the
erroneous abort won't be sent if the io has/is completing.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_scsi.c | 44 +++
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 25ef2b6ccc34..51ce244cf10a 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -4727,7 +4727,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
struct lpfc_scsi_buf *lpfc_cmd;
IOCB_t *cmd, *icmd;
int ret = SUCCESS, status = 0;
-   struct lpfc_sli_ring *pring_s4;
+   struct lpfc_sli_ring *pring_s4 = NULL;
int ret_val;
unsigned long flags;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
@@ -4757,8 +4757,25 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
}
 
iocb = _cmd->cur_iocbq;
+   if (phba->sli_rev == LPFC_SLI_REV4) {
+   if (!(phba->cfg_fof) ||
+   (!(iocb->iocb_flag & LPFC_IO_FOF))) {
+   pring_s4 =
+   phba->sli4_hba.fcp_wq[iocb->hba_wqidx]->pring;
+   } else {
+   iocb->hba_wqidx = 0;
+   pring_s4 = phba->sli4_hba.oas_wq->pring;
+   }
+   if (!pring_s4) {
+   ret = FAILED;
+   goto out_unlock;
+   }
+   spin_lock(_s4->ring_lock);
+   }
/* the command is in process of being cancelled */
if (!(iocb->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
+   if (phba->sli_rev == LPFC_SLI_REV4)
+   spin_unlock(_s4->ring_lock);
spin_unlock_irqrestore(>hbalock, flags);
lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
"3169 SCSI Layer abort requested I/O has been "
@@ -4772,6 +4789,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 * see the completion before the eh fired. Just return SUCCESS.
 */
if (lpfc_cmd->pCmd != cmnd) {
+   if (phba->sli_rev == LPFC_SLI_REV4)
+   spin_unlock(_s4->ring_lock);
lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
"3170 SCSI Layer abort requested I/O has been "
"completed by LLD.\n");
@@ -4784,6 +4803,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
if (iocb->iocb_flag & LPFC_DRIVER_ABORTED) {
lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
 "3389 SCSI Layer I/O Abort Request is pending\n");
+   if (phba->sli_rev == LPFC_SLI_REV4)
+   spin_unlock(_s4->ring_lock);
spin_unlock_irqrestore(>hbalock, flags);
goto wait_for_cmpl;
}
@@ -4791,6 +4812,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
abtsiocb = __lpfc_sli_get_iocbq(phba);
if (abtsiocb == NULL) {
ret = FAILED;
+   if (phba->sli_rev == LPFC_SLI_REV4)
+   spin_unlock(_s4->ring_lock);
goto out_unlock;
}
 
@@ -4828,14 +4851,9 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 
abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
abtsiocb->vport = vport;
+   lpfc_cmd->waitq = 
if (phba->sli_rev == LPFC_SLI_REV4) {
-   pring_s4 = lpfc_sli4_calc_ring(phba, abtsiocb);
-   if (pring_s4 == NULL) {
-   ret = FAILED;
-   goto out_unlock;
-   }
/* Note: both hbalock and ring_lock must be set here */
-   spin_lock(_s4->ring_lock);
ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
abtsiocb, 0);
spin_unlock(_s4->ring_lock);
@@ -4848,6 +4866,17 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 
 
if (ret_val == IOCB_ERROR) {
+   if (phba->sli_rev == LPFC_SLI_REV4)
+   spin_lock_irqsave(_s4->ring_lock, flags);
+   else
+   spin_lock_irqsave(>hbalock, flags);
+

[PATCH 7/8] lpfc: Remove lpfc_enable_pbde as module parameter

2018-07-31 Thread James Smart
Enablement of the PBDE optimization brought out some
incompatible behaviors under error scenarios.

Best to disable and remove the PBDE optimization.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_attr.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 514513f837a8..5a25553415f8 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -5387,14 +5387,6 @@ LPFC_BBCR_ATTR_RW(enable_bbcr, 1, 0, 1, "Enable BBC 
Recovery");
  */
 LPFC_ATTR_RW(enable_dpp, 1, 0, 1, "Enable Direct Packet Push");
 
-/*
- * lpfc_enable_pbde: Enable PBDE on PRISM - G7
- *   0  = PBDE on G7 disabled
- *   1  = PBDE on G7 enabled (default)
- * Value range is [0,1]. Default value is 1
- */
-LPFC_ATTR_R(enable_pbde, 1, 0, 1, "Enable PBDE support on PRISM");
-
 struct device_attribute *lpfc_hba_attrs[] = {
_attr_nvme_info,
_attr_bg_info,
@@ -5506,7 +5498,6 @@ struct device_attribute *lpfc_hba_attrs[] = {
_attr_lpfc_enable_mds_diags,
_attr_lpfc_enable_bbcr,
_attr_lpfc_enable_dpp,
-   _attr_lpfc_enable_pbde,
NULL,
 };
 
@@ -6541,7 +6532,6 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
lpfc_nvme_io_channel_init(phba, lpfc_nvme_io_channel);
lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr);
lpfc_enable_dpp_init(phba, lpfc_enable_dpp);
-   lpfc_enable_pbde_init(phba, lpfc_enable_pbde);
 
if (phba->sli_rev != LPFC_SLI_REV4) {
/* NVME only supported on SLI4 */
@@ -6558,6 +6548,8 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
phba->cfg_auto_imax = 0;
phba->initial_imax = phba->cfg_fcp_imax;
 
+   phba->cfg_enable_pbde = 0;
+
/* A value of 0 means use the number of CPUs found in the system */
if (phba->cfg_fcp_io_channel == 0)
phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
-- 
2.13.1



[PATCH 8/8] lpfc: update driver version to 12.0.0.6

2018-07-31 Thread James Smart
Update the driver version to 12.0.0.6

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index a669af45fce7..501249509af4 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package. *
  ***/
 
-#define LPFC_DRIVER_VERSION "12.0.0.5"
+#define LPFC_DRIVER_VERSION "12.0.0.6"
 #define LPFC_DRIVER_NAME   "lpfc"
 
 /* Used for SLI 2/3 */
-- 
2.13.1



[PATCH 2/8] lpfc: Fix sysfs Speed value on CNA ports

2018-07-31 Thread James Smart
CNA ports were showing speed as "unknown" even if the link
is up.

Add speed decoding for FCOE-based adapters

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_attr.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 23a5a298d60e..514513f837a8 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -5927,6 +5927,24 @@ lpfc_get_host_speed(struct Scsi_Host *shost)
fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
break;
}
+   } else if (lpfc_is_link_up(phba) && (phba->hba_flag & HBA_FCOE_MODE)) {
+   switch (phba->fc_linkspeed) {
+   case LPFC_ASYNC_LINK_SPEED_10GBPS:
+   fc_host_speed(shost) = FC_PORTSPEED_10GBIT;
+   break;
+   case LPFC_ASYNC_LINK_SPEED_25GBPS:
+   fc_host_speed(shost) = FC_PORTSPEED_25GBIT;
+   break;
+   case LPFC_ASYNC_LINK_SPEED_40GBPS:
+   fc_host_speed(shost) = FC_PORTSPEED_40GBIT;
+   break;
+   case LPFC_ASYNC_LINK_SPEED_100GBPS:
+   fc_host_speed(shost) = FC_PORTSPEED_100GBIT;
+   break;
+   default:
+   fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
+   break;
+   }
} else
fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
 
-- 
2.13.1



[PATCH 6/8] lpfc: Correct LCB ACCept payload

2018-07-31 Thread James Smart
After memory allocation for the LCB response frame, the memory
wasn't zero initialized, and not all fields are set. Thus garbage
shows up in the payload.

Fix by zeroing the memory at allocation.
Also properly set the Capability field based on duration support.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_els.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index df655f1284d0..4dda969e947c 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -5662,6 +5662,7 @@ lpfc_els_lcb_rsp(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
lcb_res = (struct fc_lcb_res_frame *)
(((struct lpfc_dmabuf *)elsiocb->context2)->virt);
 
+   memset(lcb_res, 0, sizeof(struct fc_lcb_res_frame));
icmd = >iocb;
icmd->ulpContext = lcb_context->rx_id;
icmd->unsli3.rcvsli3.ox_id = lcb_context->ox_id;
@@ -5670,6 +5671,7 @@ lpfc_els_lcb_rsp(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
*((uint32_t *)(pcmd)) = ELS_CMD_ACC;
lcb_res->lcb_sub_command = lcb_context->sub_command;
lcb_res->lcb_type = lcb_context->type;
+   lcb_res->capability = lcb_context->capability;
lcb_res->lcb_frequency = lcb_context->frequency;
lcb_res->lcb_duration = lcb_context->duration;
elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
-- 
2.13.1



[PATCH 0/8] lpfc updates for 12.0.0.6

2018-07-31 Thread James Smart
This patch contains lpfc bug fixes 

The patches were cut against the Martin's 4.19/scsi-queue tree

James Smart (8):
  lpfc: Fix ELS abort on SLI-3 adapters
  lpfc: Fix sysfs Speed value on CNA ports
  lpfc: Fix list corruption on the completion queue.
  lpfc: Fix driver crash when re-registering NVME rports.
  lpfc: Limit tracking of tgt queue depth in fast path
  lpfc: Correct LCB ACCept payload
  lpfc: Remove lpfc_enable_pbde as module parameter
  lpfc: update driver version to 12.0.0.6

 drivers/scsi/lpfc/lpfc_attr.c  |  30 ++
 drivers/scsi/lpfc/lpfc_disc.h  |   3 +
 drivers/scsi/lpfc/lpfc_els.c   |   2 +
 drivers/scsi/lpfc/lpfc_nportdisc.c |   9 +--
 drivers/scsi/lpfc/lpfc_nvme.c  |  63 
 drivers/scsi/lpfc/lpfc_nvme.h  |   1 +
 drivers/scsi/lpfc/lpfc_scsi.c  | 118 +
 drivers/scsi/lpfc/lpfc_scsi.h  |   2 +
 drivers/scsi/lpfc/lpfc_sli.c   |  16 -
 drivers/scsi/lpfc/lpfc_version.h   |   2 +-
 10 files changed, 164 insertions(+), 82 deletions(-)

-- 
2.13.1



[PATCH 1/8] lpfc: Fix ELS abort on SLI-3 adapters

2018-07-31 Thread James Smart
For ABORT_XRI_CN command, firmware identifies XRI to abort by
IOTAG and RPI combination. For ELS aborts, driver specifies
IOTAG correctly but RPI is not specified.

Fix by setting RPI in WQE.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_nportdisc.c |  3 +++
 drivers/scsi/lpfc/lpfc_sli.c   | 16 ++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c 
b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 1f0a7d7dbc54..843e765db86d 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1062,6 +1062,9 @@ lpfc_rcv_logo_plogi_issue(struct lpfc_vport *vport, 
struct lpfc_nodelist *ndlp,
 {
struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
 
+   /* Retrieve RPI from LOGO IOCB. RPI is used for CMD_ABORT_XRI_CN */
+   if (vport->phba->sli_rev == LPFC_SLI_REV3)
+   ndlp->nlp_rpi = cmdiocb->iocb.ulpIoTag;
/* software abort outstanding PLOGI */
lpfc_els_abort(vport->phba, ndlp);
 
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 72500dcb13a9..9830bdb6e072 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -10703,6 +10703,12 @@ lpfc_sli_abort_els_cmpl(struct lpfc_hba *phba, struct 
lpfc_iocbq *cmdiocb,
 
spin_lock_irq(>hbalock);
if (phba->sli_rev < LPFC_SLI_REV4) {
+   if (irsp->ulpCommand == CMD_ABORT_XRI_CX &&
+   irsp->ulpStatus == IOSTAT_LOCAL_REJECT &&
+   irsp->un.ulpWord[4] == IOERR_ABORT_REQUESTED) {
+   spin_unlock_irq(>hbalock);
+   goto release_iocb;
+   }
if (abort_iotag != 0 &&
abort_iotag <= phba->sli.last_iotag)
abort_iocb =
@@ -10724,6 +10730,7 @@ lpfc_sli_abort_els_cmpl(struct lpfc_hba *phba, struct 
lpfc_iocbq *cmdiocb,
 
spin_unlock_irq(>hbalock);
}
+release_iocb:
lpfc_sli_release_iocbq(phba, cmdiocb);
return;
 }
@@ -10780,6 +10787,7 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, 
struct lpfc_sli_ring *pring,
IOCB_t *iabt = NULL;
int retval;
unsigned long iflags;
+   struct lpfc_nodelist *ndlp;
 
lockdep_assert_held(>hbalock);
 
@@ -10810,9 +10818,13 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, 
struct lpfc_sli_ring *pring,
if (phba->sli_rev == LPFC_SLI_REV4) {
iabt->un.acxri.abortIoTag = cmdiocb->sli4_xritag;
iabt->un.acxri.abortContextTag = cmdiocb->iotag;
-   }
-   else
+   } else {
iabt->un.acxri.abortIoTag = icmd->ulpIoTag;
+   if (pring->ringno == LPFC_ELS_RING) {
+   ndlp = (struct lpfc_nodelist *)(cmdiocb->context1);
+   iabt->un.acxri.abortContextTag = ndlp->nlp_rpi;
+   }
+   }
iabt->ulpLe = 1;
iabt->ulpClass = icmd->ulpClass;
 
-- 
2.13.1



[PATCH 5/8] lpfc: Limit tracking of tgt queue depth in fast path

2018-07-31 Thread James Smart
Performance is effected when target queue depth is tracked.
An atomic counter is incremented on the submission path which
competes with it being decremented on the completion path.
In addition multiple CPUs can simultaniously be manipulating
this counter for the same ndlp.

Reduce the overhead by only performing the target increment/decrement
when the target queue depth is less than the overall adapter depth,
thus is actually meaningful.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_disc.h  |  3 ++
 drivers/scsi/lpfc/lpfc_nportdisc.c |  6 
 drivers/scsi/lpfc/lpfc_nvme.c  | 41 -
 drivers/scsi/lpfc/lpfc_nvme.h  |  1 +
 drivers/scsi/lpfc/lpfc_scsi.c  | 74 ++
 drivers/scsi/lpfc/lpfc_scsi.h  |  2 ++
 6 files changed, 74 insertions(+), 53 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 376f02819f20..28e2b60fc5c0 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -150,6 +150,9 @@ struct lpfc_node_rrq {
unsigned long rrq_stop_time;
 };
 
+#define lpfc_ndlp_check_qdepth(phba, ndlp) \
+   (ndlp->cmd_qdepth < phba->sli4_hba.max_cfg_param.max_xri)
+
 /* Defines for nlp_flag (uint32) */
 #define NLP_IGNR_REG_CMPL  0x0001 /* Rcvd rscn before we cmpl reg login */
 #define NLP_REG_LOGIN_SEND 0x0002   /* sent reglogin to adapter */
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c 
b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 843e765db86d..bd9bce9d9974 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1985,12 +1985,6 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, 
struct lpfc_nodelist *ndlp,
if (bf_get_be32(prli_disc, nvpr))
ndlp->nlp_type |= NLP_NVME_DISCOVERY;
 
-   /* This node is an NVME target.  Adjust the command
-* queue depth on this node to not exceed the available
-* xris.
-*/
-   ndlp->cmd_qdepth = phba->sli4_hba.nvme_xri_max;
-
/*
 * If prli_fba is set, the Target supports FirstBurst.
 * If prli_fb_sz is 0, the FirstBurst size is unlimited,
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 32053424d546..028462e5994d 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1135,9 +1135,6 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct 
lpfc_iocbq *pwqeIn,
else
lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
 
-   if (ndlp && NLP_CHK_NODE_ACT(ndlp))
-   atomic_dec(>cmd_pending);
-
/* Update stats and complete the IO.  There is
 * no need for dma unprep because the nvme_transport
 * owns the dma address.
@@ -1546,17 +1543,19 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port 
*pnvme_lport,
/* The node is shared with FCP IO, make sure the IO pending count does
 * not exceed the programmed depth.
 */
-   if ((atomic_read(>cmd_pending) >= ndlp->cmd_qdepth) &&
-   !expedite) {
-   lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
-"6174 Fail IO, ndlp qdepth exceeded: "
-"idx %d DID %x pend %d qdepth %d\n",
-lpfc_queue_info->index, ndlp->nlp_DID,
-atomic_read(>cmd_pending),
-ndlp->cmd_qdepth);
-   atomic_inc(>xmt_fcp_qdepth);
-   ret = -EBUSY;
-   goto out_fail;
+   if (lpfc_ndlp_check_qdepth(phba, ndlp)) {
+   if ((atomic_read(>cmd_pending) >= ndlp->cmd_qdepth) &&
+   !expedite) {
+   lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
+"6174 Fail IO, ndlp qdepth exceeded: "
+"idx %d DID %x pend %d qdepth %d\n",
+lpfc_queue_info->index, ndlp->nlp_DID,
+atomic_read(>cmd_pending),
+ndlp->cmd_qdepth);
+   atomic_inc(>xmt_fcp_qdepth);
+   ret = -EBUSY;
+   goto out_fail;
+   }
}
 
lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp, expedite);
@@ -1614,8 +1613,6 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port 
*pnvme_lport,
goto out_free_nvme_buf;
}
 
-   atomic_inc(>cmd_pending);
-
lpfc_nvmeio_data(phba, "NVME FCP XMIT: xri x%x idx %d to %06x\n",
 lpfc_ncmd->cur_iocbq.sli4_xritag,
 lpfc_queue_info->index, ndlp->nlp_DID);
@@ -1623,7 +1620,6 @@ 

[PATCH 4/8] lpfc: Fix driver crash when re-registering NVME rports.

2018-07-31 Thread James Smart
During remote port loss fault testing, the driver crashed
with the following trace:

general protection fault:  [#1] SMP
RIP: ... lpfc_nvme_register_port+0x250/0x480 [lpfc]
Call Trace:
 lpfc_nlp_state_cleanup+0x1b3/0x7a0 [lpfc]
 lpfc_nlp_set_state+0xa6/0x1d0 [lpfc]
 lpfc_cmpl_prli_prli_issue+0x213/0x440
 lpfc_disc_state_machine+0x7e/0x1e0 [lpfc]
 lpfc_cmpl_els_prli+0x18a/0x200 [lpfc]
 lpfc_sli_sp_handle_rspiocb+0x3b5/0x6f0 [lpfc]
 lpfc_sli_handle_slow_ring_event_s4+0x161/0x240 [lpfc]
 lpfc_work_done+0x948/0x14c0 [lpfc]
 lpfc_do_work+0x16f/0x180 [lpfc]
 kthread+0xc9/0xe0
 ret_from_fork+0x55/0x80

After registering a new remoteport, the driver is pulling an
ndlp pointer from the lpfc rport associated with the private
area of a newly registered remoteport. The private area is
uninitialized, so it's garbage.

Correct by pulling the the lpfc rport pointer from the entering
ndlp point, then ndlp value from at rport. Note the entering
ndlp may be replacing by the rport->ndlp due to an address
change swap.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_nvme.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 4cc6783b6a9f..32053424d546 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2687,7 +2687,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct 
lpfc_nodelist *ndlp)
struct lpfc_nvme_rport *oldrport;
struct nvme_fc_remote_port *remote_port;
struct nvme_fc_port_info rpinfo;
-   struct lpfc_nodelist *prev_ndlp;
+   struct lpfc_nodelist *prev_ndlp = NULL;
 
lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC,
 "6006 Register NVME PORT. DID x%06x nlptype x%x\n",
@@ -2736,23 +2736,29 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, 
struct lpfc_nodelist *ndlp)
spin_unlock_irq(>phba->hbalock);
rport = remote_port->private;
if (oldrport) {
+   /* New remoteport record does not guarantee valid
+* host private memory area.
+*/
+   prev_ndlp = oldrport->ndlp;
if (oldrport == remote_port->private) {
-   /* Same remoteport.  Just reuse. */
+   /* Same remoteport - ndlp should match.
+* Just reuse.
+*/
lpfc_printf_vlog(ndlp->vport, KERN_INFO,
 LOG_NVME_DISC,
 "6014 Rebinding lport to "
 "remoteport %p wwpn 0x%llx, "
-"Data: x%x x%x %p x%x x%06x\n",
+"Data: x%x x%x %p %p x%x 
x%06x\n",
 remote_port,
 remote_port->port_name,
 remote_port->port_id,
 remote_port->port_role,
+prev_ndlp,
 ndlp,
 ndlp->nlp_type,
 ndlp->nlp_DID);
return 0;
}
-   prev_ndlp = rport->ndlp;
 
/* Sever the ndlp<->rport association
 * before dropping the ndlp ref from
@@ -2786,13 +2792,13 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, 
struct lpfc_nodelist *ndlp)
lpfc_printf_vlog(vport, KERN_INFO,
 LOG_NVME_DISC | LOG_NODE,
 "6022 Binding new rport to "
-"lport %p Remoteport %p  WWNN 0x%llx, "
+"lport %p Remoteport %p rport %p WWNN 0x%llx, "
 "Rport WWPN 0x%llx DID "
-"x%06x Role x%x, ndlp %p\n",
-lport, remote_port,
+"x%06x Role x%x, ndlp %p prev_ndlp %p\n",
+lport, remote_port, rport,
 rpinfo.node_name, rpinfo.port_name,
 rpinfo.port_id, rpinfo.port_role,
-ndlp);
+ndlp, prev_ndlp);
} else {
lpfc_printf_vlog(vport, KERN_ERR,
 LOG_NVME_DISC | LOG_NODE,
-- 
2.13.1



[PATCH] scsi: csiostor: remove automatic irq affinity assignment

2018-07-31 Thread Varun Prakash
If number of interrupt vectors are more than num_online_cpus()
then pci_alloc_irq_vectors_affinity() assigns cpumask based
on num_possible_cpus() to the remaining vectors because of
this interrupt does not generate for these vectors.

This patch fixes this issue by using pci_alloc_irq_vectors()
instead of pci_alloc_irq_vectors_affinity().

Signed-off-by: Varun Prakash 
---
 drivers/scsi/csiostor/csio_isr.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/scsi/csiostor/csio_isr.c b/drivers/scsi/csiostor/csio_isr.c
index 7c88147..8b92c59 100644
--- a/drivers/scsi/csiostor/csio_isr.c
+++ b/drivers/scsi/csiostor/csio_isr.c
@@ -480,7 +480,6 @@ csio_enable_msix(struct csio_hw *hw)
int i, j, k, n, min, cnt;
int extra = CSIO_EXTRA_VECS;
struct csio_scsi_cpu_info *info;
-   struct irq_affinity desc = { .pre_vectors = 2 };
 
min = hw->num_pports + extra;
cnt = hw->num_sqsets + extra;
@@ -491,8 +490,7 @@ csio_enable_msix(struct csio_hw *hw)
 
csio_dbg(hw, "FW supp #niq:%d, trying %d msix's\n", hw->cfg_niq, cnt);
 
-   cnt = pci_alloc_irq_vectors_affinity(hw->pdev, min, cnt,
-   PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, );
+   cnt = pci_alloc_irq_vectors(hw->pdev, min, cnt, PCI_IRQ_MSIX);
if (cnt < 0)
return cnt;
 
-- 
2.0.2



Re: FCOE vn2vn memory leaks in 4.14

2018-07-31 Thread ard
Hi,

On Tue, Jul 31, 2018 at 10:38:06AM +0200, Johannes Thumshirn wrote:
> So I've fixed one use-after-free and one memory leak, but the one you
> reported is still on the TODO list.

Wow, thanks...

> Long story short, I can reproduce it here and I'm working on it.
> 
> Thanks for your patience,
Thank you for being so pro-active, seriously. I wanted to look at
it some more, but heatwave, no airco and a festival in between
:-(.

Anyway, I got a PC and an odroid Xu4 (ARM) for testing now.

Regards,
Ard van Breemen
-- 
.signature not found


Re: FCOE vn2vn memory leaks in 4.14

2018-07-31 Thread Johannes Thumshirn
On Fri, Jul 27, 2018 at 12:49:55AM +0200, ard wrote:
> Actually already got there from my arm dump, but they are different in 
> backtrace.
> Anyway:
> root@antec:~# grep -c fc_rport_create kmemleak.txt
> 44
> So 44 * 512 bytes leaked in that path. And an extra thing: "it was leaked in" 
> libfc and not libfcoe.
> Or just like the bug report we were leaking fc_rport_priv.
> But one thing I don't understand (yet) is why the fc_rport_create happens 
> while
> we already have a port.
> 
> Anyway, I will continue bug hunting. It's night, and the temperature has 
> dropped to 29.8 .

So I've fixed one use-after-free and one memory leak, but the one you
reported is still on the TODO list.

Long story short, I can reproduce it here and I'm working on it.

Thanks for your patience,
   Johannes
-- 
Johannes Thumshirn  Storage
jthumsh...@suse.de+49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850