The driver allocates a sg list per io struture based on a fixed maximum size. When it registers with the protocol transports and indicates the max sg list size it supports, the driver manipulates the fixed value to report a lesser amount so that it has reserved space for sg elements that are used for DIF.
The driver initialization path sets the cfg_sg_seg_cnt field to the manipulated value for scsi. NVME initialization ran afterward and capped it's maximum by the manipulated value for SCSI. This erroneously made NVME report the SCSI-reduce-for-DIF value that reduced the max io size for nvme and wasted sg elements. Rework the driver so that cfg_sg_seg_cnt becomes the overall maximum size and allow the max size to be tunable. A separate (new) scsi sg count is then setup with the scsi-modified reduced value. NVME then initializes based off the overall maximum. Signed-off-by: Dick Kennedy <dick.kenn...@broadcom.com> Signed-off-by: James Smart <james.sm...@broadcom.com> --- drivers/scsi/lpfc/lpfc.h | 3 +- drivers/scsi/lpfc/lpfc_attr.c | 69 +++++++++++++++++++++++++++++++++++++++--- drivers/scsi/lpfc/lpfc_init.c | 42 ++++++++++++++++++------- drivers/scsi/lpfc/lpfc_nvme.c | 13 ++------ drivers/scsi/lpfc/lpfc_nvmet.c | 13 ++------ 5 files changed, 103 insertions(+), 37 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 43732e8d1347..322b24f826a4 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -52,7 +52,7 @@ struct lpfc_sli2_slim; downloads using bsg */ #define LPFC_MIN_SG_SLI4_BUF_SZ 0x800 /* based on LPFC_DEFAULT_SG_SEG_CNT */ -#define LPFC_MAX_SG_SLI4_SEG_CNT_DIF 128 /* sg element count per scsi cmnd */ +#define LPFC_MAX_BG_SLI4_SEG_CNT_DIF 128 /* sg element count for BlockGuard */ #define LPFC_MAX_SG_SEG_CNT_DIF 512 /* sg element count per scsi cmnd */ #define LPFC_MAX_SG_SEG_CNT 4096 /* sg element count per scsi cmnd */ #define LPFC_MIN_SG_SEG_CNT 32 /* sg element count per scsi cmnd */ @@ -790,6 +790,7 @@ struct lpfc_hba { uint32_t cfg_total_seg_cnt; uint32_t cfg_sg_seg_cnt; uint32_t cfg_nvme_seg_cnt; + uint32_t cfg_scsi_seg_cnt; uint32_t cfg_sg_dma_buf_size; uint64_t cfg_soft_wwnn; uint64_t cfg_soft_wwpn; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 057a60abe664..7d81f44630ee 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5353,15 +5353,74 @@ LPFC_ATTR(delay_discovery, 0, 0, 1, /* * lpfc_sg_seg_cnt - Initial Maximum DMA Segment Count - * This value can be set to values between 64 and 4096. The default value is - * 64, but may be increased to allow for larger Max I/O sizes. The scsi layer - * will be allowed to request I/Os of sizes up to (MAX_SEG_COUNT * SEG_SIZE). + * This value can be set to values between 64 and 4096. The default value + * is 64, but may be increased to allow for larger Max I/O sizes. The scsi + * and nvme layers will allow I/O sizes up to (MAX_SEG_COUNT * SEG_SIZE). * Because of the additional overhead involved in setting up T10-DIF, * this parameter will be limited to 128 if BlockGuard is enabled under SLI4 * and will be limited to 512 if BlockGuard is enabled under SLI3. */ -LPFC_ATTR_R(sg_seg_cnt, LPFC_DEFAULT_SG_SEG_CNT, LPFC_MIN_SG_SEG_CNT, - LPFC_MAX_SG_SEG_CNT, "Max Scatter Gather Segment Count"); +static uint lpfc_sg_seg_cnt = LPFC_DEFAULT_SG_SEG_CNT; +module_param(lpfc_sg_seg_cnt, uint, 0444); +MODULE_PARM_DESC(lpfc_sg_seg_cnt, "Max Scatter Gather Segment Count"); + +/** + * lpfc_sg_seg_cnt_show - Display the scatter/gather list sizes + * configured for the adapter + * @dev: class converted to a Scsi_host structure. + * @attr: device attribute, not used. + * @buf: on return contains a string with the list sizes + * + * Returns: size of formatted string. + **/ +static ssize_t +lpfc_sg_seg_cnt_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata; + struct lpfc_hba *phba = vport->phba; + int len; + + len = snprintf(buf, PAGE_SIZE, "SGL sz: %d total SGEs: %d\n", + phba->cfg_sg_dma_buf_size, phba->cfg_total_seg_cnt); + + len += snprintf(buf + len, PAGE_SIZE, "Cfg: %d SCSI: %d NVME: %d\n", + phba->cfg_sg_seg_cnt, phba->cfg_scsi_seg_cnt, + phba->cfg_nvme_seg_cnt); + return len; +} + +static DEVICE_ATTR_RO(lpfc_sg_seg_cnt); + +/** + * lpfc_sg_seg_cnt_init - Set the hba sg_seg_cnt initial value + * @phba: lpfc_hba pointer. + * @val: contains the initial value + * + * Description: + * Validates the initial value is within range and assigns it to the + * adapter. If not in range, an error message is posted and the + * default value is assigned. + * + * Returns: + * zero if value is in range and is set + * -EINVAL if value was out of range + **/ +static int +lpfc_sg_seg_cnt_init(struct lpfc_hba *phba, int val) +{ + if (val >= LPFC_MIN_SG_SEG_CNT && val <= LPFC_MAX_SG_SEG_CNT) { + phba->cfg_sg_seg_cnt = val; + return 0; + } + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0409 "LPFC_DRIVER_NAME"_sg_seg_cnt attribute cannot " + "be set to %d, allowed range is [%d, %d]\n", + val, LPFC_MIN_SG_SEG_CNT, LPFC_MAX_SG_SEG_CNT); + phba->cfg_sg_seg_cnt = LPFC_DEFAULT_SG_SEG_CNT; + return -EINVAL; +} /* * lpfc_enable_mds_diags: Enable MDS Diagnostics diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index f3cae733ae2d..90fb83f88179 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3956,7 +3956,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) if (phba->sli_rev == LPFC_SLI_REV4) { shost->dma_boundary = phba->sli4_hba.pc_sli4_params.sge_supp_len-1; - shost->sg_tablesize = phba->cfg_sg_seg_cnt; + shost->sg_tablesize = phba->cfg_scsi_seg_cnt; } /* @@ -5919,8 +5919,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp */ max_buf_size = (2 * SLI4_PAGE_SIZE); - if (phba->cfg_sg_seg_cnt > LPFC_MAX_SGL_SEG_CNT - extra) - phba->cfg_sg_seg_cnt = LPFC_MAX_SGL_SEG_CNT - extra; /* * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size @@ -5942,9 +5940,16 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */ phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT; - if (phba->cfg_sg_seg_cnt > LPFC_MAX_SG_SLI4_SEG_CNT_DIF) - phba->cfg_sg_seg_cnt = - LPFC_MAX_SG_SLI4_SEG_CNT_DIF; + /* + * If supporting DIF, reduce the seg count for scsi to + * allow room for the DIF sges. + */ + if (phba->cfg_enable_bg && + phba->cfg_sg_seg_cnt > LPFC_MAX_BG_SLI4_SEG_CNT_DIF) + phba->cfg_scsi_seg_cnt = LPFC_MAX_BG_SLI4_SEG_CNT_DIF; + else + phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt; + } else { /* * The scsi_buf for a regular I/O holds the FCP cmnd, @@ -5958,6 +5963,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Total SGEs for scsi_sg_list */ phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + extra; + phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt; /* * NOTE: if (phba->cfg_sg_seg_cnt + extra) <= 256 we only @@ -5965,10 +5971,22 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) */ } + /* Limit to LPFC_MAX_NVME_SEG_CNT for NVME. */ + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { + lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, + "6300 Reducing NVME sg segment " + "cnt to %d\n", + LPFC_MAX_NVME_SEG_CNT); + phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; + } else + phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; + } + /* Initialize the host templates with the updated values. */ - lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt; - lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt; - lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt; + lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt; + lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt; + lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt; if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ) phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ; @@ -5977,9 +5995,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size); lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP, - "9087 sg_tablesize:%d dmabuf_size:%d total_sge:%d\n", + "9087 sg_seg_cnt:%d dmabuf_size:%d " + "total:%d scsi:%d nvme:%d\n", phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size, - phba->cfg_total_seg_cnt); + phba->cfg_total_seg_cnt, phba->cfg_scsi_seg_cnt, + phba->cfg_nvme_seg_cnt); /* Initialize buffer queue management fields */ INIT_LIST_HEAD(&phba->hbqs[LPFC_ELS_HBQ].hbq_buffer_list); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 028462e5994d..a84299c36c9f 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2462,17 +2462,10 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) nfcp_info.node_name = wwn_to_u64(vport->fc_nodename.u.wwn); nfcp_info.port_name = wwn_to_u64(vport->fc_portname.u.wwn); - /* Limit to LPFC_MAX_NVME_SEG_CNT. - * For now need + 1 to get around NVME transport logic. + /* We need to tell the transport layer + 1 because it takes page + * alignment into account. When space for the SGL is allocated we + * allocate + 3, one for cmd, one for rsp and one for this alignment */ - if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { - lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_INIT, - "6300 Reducing sg segment cnt to %d\n", - LPFC_MAX_NVME_SEG_CNT); - phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; - } else { - phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; - } lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_nvme_template.max_hw_queues = phba->cfg_nvme_io_channel; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index b766afe10d3d..4926ca6f0b8c 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1373,17 +1373,10 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn); pinfo.port_id = vport->fc_myDID; - /* Limit to LPFC_MAX_NVME_SEG_CNT. - * For now need + 1 to get around NVME transport logic. + /* We need to tell the transport layer + 1 because it takes page + * alignment into account. When space for the SGL is allocated we + * allocate + 3, one for cmd, one for rsp and one for this alignment */ - if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { - lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, - "6400 Reducing sg segment cnt to %d\n", - LPFC_MAX_NVME_SEG_CNT); - phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; - } else { - phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; - } lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel; lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP; -- 2.13.1