Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On 7/28/2013 11:15 AM, Or Gerlitz wrote: On 26/07/2013 20:15, Vu Pham wrote: Hello Or/Sagi, Just a minor /** + * iser_create_frwr_pool - Creates pool of fast_reg descriptors + * for fast registration work requests. + * returns 0 on success, or errno code on failure + */ +int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +{ +struct iser_device*device = ib_conn->device; +struct fast_reg_descriptor*desc; +int i, ret; + + INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); +ib_conn->fastreg.frwr.pool_size = 0; +for (i = 0; i < cmds_max; i++) { +desc = kmalloc(sizeof(*desc), GFP_KERNEL); +if (!desc) { +iser_err("Failed to allocate a new fast_reg descriptor\n"); +ret = -ENOMEM; +goto err; +} + +desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); +if (IS_ERR(desc->data_frpl)) { ret = PTR_ERR(desc->data_frpl); +iser_err("Failed to allocate ib_fast_reg_page_list err=%ld\n", + PTR_ERR(desc->data_frpl)); using ret +goto err; +} + +desc->data_mr = ib_alloc_fast_reg_mr(device->pd, + ISCSI_ISER_SG_TABLESIZE + 1); +if (IS_ERR(desc->data_mr)) { ret = PTR_ERR(desc->data_mr); +iser_err("Failed to allocate ib_fast_reg_mr err=%ld\n", + PTR_ERR(desc->data_mr)); using ret + ib_free_fast_reg_page_list(desc->data_frpl); +goto err; +} +desc->valid = true; +list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); +ib_conn->fastreg.frwr.pool_size++; +} + +return 0; +err: +iser_free_frwr_pool(ib_conn); +return ret; +} Nice catch! I see that Roland hasn't yet picked this series so I will re-submit it with fixes to the issues you have found here. Or. Nice catch indeed, thanks Vu. -Sagi -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On 26/07/2013 20:15, Vu Pham wrote: Hello Or/Sagi, Just a minor /** + * iser_create_frwr_pool - Creates pool of fast_reg descriptors + * for fast registration work requests. + * returns 0 on success, or errno code on failure + */ +int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +{ +struct iser_device*device = ib_conn->device; +struct fast_reg_descriptor*desc; +int i, ret; + + INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); +ib_conn->fastreg.frwr.pool_size = 0; +for (i = 0; i < cmds_max; i++) { +desc = kmalloc(sizeof(*desc), GFP_KERNEL); +if (!desc) { +iser_err("Failed to allocate a new fast_reg descriptor\n"); +ret = -ENOMEM; +goto err; +} + +desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); +if (IS_ERR(desc->data_frpl)) { ret = PTR_ERR(desc->data_frpl); +iser_err("Failed to allocate ib_fast_reg_page_list err=%ld\n", + PTR_ERR(desc->data_frpl)); using ret +goto err; +} + +desc->data_mr = ib_alloc_fast_reg_mr(device->pd, + ISCSI_ISER_SG_TABLESIZE + 1); +if (IS_ERR(desc->data_mr)) { ret = PTR_ERR(desc->data_mr); +iser_err("Failed to allocate ib_fast_reg_mr err=%ld\n", + PTR_ERR(desc->data_mr)); using ret + ib_free_fast_reg_page_list(desc->data_frpl); +goto err; +} +desc->valid = true; +list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); +ib_conn->fastreg.frwr.pool_size++; +} + +return 0; +err: +iser_free_frwr_pool(ib_conn); +return ret; +} Nice catch! I see that Roland hasn't yet picked this series so I will re-submit it with fixes to the issues you have found here. Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
Hello Or/Sagi, Just a minor /** + * iser_create_frwr_pool - Creates pool of fast_reg descriptors + * for fast registration work requests. + * returns 0 on success, or errno code on failure + */ +int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +{ + struct iser_device *device = ib_conn->device; + struct fast_reg_descriptor *desc; + int i, ret; + + INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); + ib_conn->fastreg.frwr.pool_size = 0; + for (i = 0; i < cmds_max; i++) { + desc = kmalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + iser_err("Failed to allocate a new fast_reg descriptor\n"); + ret = -ENOMEM; + goto err; + } + + desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_frpl)) { ret = PTR_ERR(desc->data_frpl); + iser_err("Failed to allocate ib_fast_reg_page_list err=%ld\n", +PTR_ERR(desc->data_frpl)); using ret + goto err; + } + + desc->data_mr = ib_alloc_fast_reg_mr(device->pd, +ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_mr)) { ret = PTR_ERR(desc->data_mr); + iser_err("Failed to allocate ib_fast_reg_mr err=%ld\n", +PTR_ERR(desc->data_mr)); using ret + ib_free_fast_reg_page_list(desc->data_frpl); + goto err; + } + desc->valid = true; + list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + ib_conn->fastreg.frwr.pool_size++; + } + + return 0; +err: + iser_free_frwr_pool(ib_conn); + return ret; +} -vu -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On 07/24/13 18:28, Or Gerlitz wrote: On 23/07/2013 17:47, Bart Van Assche wrote: Still regarding page sizes: shouldn't ib_alloc_fast_reg_page_list() and ib_alloc_fast_reg_mr() multiply the SG list length by PAGE_SIZE / SIZE_4K to compensate for page size differences on architectures where virtual memory pages are larger than 4KB ? My understanding of the API is that it assumes that the HW is capable to change the page shift (size) associated with certain fast_reg MR for each mapping. I think you have a point here. Although I'm not sure this is what you had in mind, using PAGE_SIZE / PAGE_SHIFT instead of SIZE_4K / SHIFT_4K should work better on architectures with a page size larger than 4KB. On mlx4 the largest supported page list length is 511 (MLX4_MAX_FAST_REG_PAGES). So the former option should allow to support larger sg lists on such architectures (up to length 511) compared to the latter (up to length 511/(64/4) = 31 for 64KB pages). Bart. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On 23/07/2013 17:47, Bart Van Assche wrote: Sorry but I had overlooked the bounce buffer patch. Regarding page sizes: is an InfiniBand HCA required to support a page size of 512 bytes ? To me it seems like the smallest page size supported by e.g. the ocrdma driver is 4KB. From ocrdma_query_device(): attr->page_size_cap = 0x000; I don't think supporting "block lists" that is pages size of 512B for memory registration is mandated by the IB spec, even though ConnectX2/3 HCAs do support it, #ibv_devinfo -v | grep page_size_cap page_size_cap: 0xfe00 # ibv_devinfo -v | grep _id hca_id: mlx4_0 vendor_id: 0x02c9 vendor_part_id: 26428 board_id: MT_0D81120009 Still regarding page sizes: shouldn't ib_alloc_fast_reg_page_list() and ib_alloc_fast_reg_mr() multiply the SG list length by PAGE_SIZE / SIZE_4K to compensate for page size differences on architectures where virtual memory pages are larger than 4KB ? My understanding of the API is that it assumes that the HW is capable to change the page shift (size) associated with certain fast_reg MR for each mapping. This is indeed a bit different from the kernel FMR API where the page shift is provided when the FMR is created. For mlx4, the relevant field is struct mlx4_mpt_entry->entity_size which is written with the page_shift provided by the ULP for FMRs in mlx4_mr_enable and with 0 for fast_reg mrs. Later when fast reg WR is posted to the HW, struct mlx4_wqe_fmr_seg->page_size is written with the page shift provided by the ULP and the HW treats it the same as it was that entity_size field... Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On 07/23/13 16:21, Or Gerlitz wrote: Bart, iSER's FMR and FRWR code works under the assumption that an SG list is "4K aligned". For SGs which don't obey that assumption we're using bounce buffer. Note that the SG "page" size used by FMRs/FRWRs doesn't have to be 1:1 with the OS page size, so in that respect && down the road, we will get rid of the bounce buffer thing with having another FMR/FRWR pool whose "page" size is 512B and will be used for SG which are not "4K aligned". Sorry but I had overlooked the bounce buffer patch. Regarding page sizes: is an InfiniBand HCA required to support a page size of 512 bytes ? To me it seems like the smallest page size supported by e.g. the ocrdma driver is 4KB. From ocrdma_query_device(): attr->page_size_cap = 0x000; Still regarding page sizes: shouldn't ib_alloc_fast_reg_page_list() and ib_alloc_fast_reg_mr() multiply the SG list length by PAGE_SIZE / SIZE_4K to compensate for page size differences on architectures where virtual memory pages are larger than 4KB ? Bart. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On 7/23/2013 2:58 PM, Bart Van Assche wrote: On 07/22/13 15:11, Sagi Grimberg wrote: So just to clarify the flow: . at connection establishment allocate pool of fastreg descriptors . upon each IOP take a fastreg descriptor from the pool . if it is not invalidated - invalidate it. . register using FRWR. . when cleanup_task is called - just return the fastreg descriptor to the pool. . at connection teardown free all resources. Still to come: . upon each IOP response, check if the target used remote invalidate - if so mark relevant fastreg as valid. Hello Sagi and Or, Thanks for the clarifications. I have one more question though. My interpretation of section "10.6 Memory Management" in the IB specification is that memory registration maps a memory region that either has contiguous virtual addresses or contiguous physical addresses. However, there is no such requirement for an sg-list. As an example, for direct I/O to a block device with a sector size of 512 bytes it is only required that I/O occurs in multiples of 512 bytes and from memory aligned on 512-byte boundaries. So the use of direct I/O can result in an sg-list where the second and subsequent sg-list elements have a non-zero offset. Do you agree with this ? Are such sg-lists mapped correctly by the FRWR code ? Bart. Hey Bart, You are on the money with this observation, like FMRs, FRWR cannot register any arbitrary SG-list. You have the same limitations. Unlike SRP where the initiator will use multiple FMRs to register such "unaligned" SG-lists, iSER uses a bounce buffer to copy the data to a nice physically contiguous memory area (see patch 5/7 fall_to_bounce_buf routine), thus will pass a single R_Key for each transaction. An equivalent FRWR implementation for SRP will also use multiple FRWRs in-order to register such "un-aligned" SG-lists and publish the R_Keys in ib_sge. Hope this helps, -Sagi -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On Tue, Jul 23, 2013 at 2:58 PM, Bart Van Assche wrote: > > [...] > Hello Sagi and Or, > > Thanks for the clarifications. I have one more question though. My > interpretation of section "10.6 Memory Management" in the IB specification is > that memory registration maps a memory region that either has contiguous > virtual addresses or contiguous physical addresses. However, there is no such > requirement for an sg-list. As an example, for direct I/O to a block device > with a sector size of 512 bytes it is only required that I/O occurs in > multiples of 512 bytes and from memory aligned on 512-byte boundaries. So the > use of direct I/O can result in an sg-list where the second and subsequent > sg-list elements have a non-zero offset. Do you agree with this ? YES, this can happen. > > Are such sg-lists mapped correctly by the FRWR code ? Bart, iSER's FMR and FRWR code works under the assumption that an SG list is "4K aligned". For SGs which don't obey that assumption we're using bounce buffer. Note that the SG "page" size used by FMRs/FRWRs doesn't have to be 1:1 with the OS page size, so in that respect && down the road, we will get rid of the bounce buffer thing with having another FMR/FRWR pool whose "page" size is 512B and will be used for SG which are not "4K aligned". Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On 07/22/13 15:11, Sagi Grimberg wrote: So just to clarify the flow: . at connection establishment allocate pool of fastreg descriptors . upon each IOP take a fastreg descriptor from the pool . if it is not invalidated - invalidate it. . register using FRWR. . when cleanup_task is called - just return the fastreg descriptor to the pool. . at connection teardown free all resources. Still to come: . upon each IOP response, check if the target used remote invalidate - if so mark relevant fastreg as valid. Hello Sagi and Or, Thanks for the clarifications. I have one more question though. My interpretation of section "10.6 Memory Management" in the IB specification is that memory registration maps a memory region that either has contiguous virtual addresses or contiguous physical addresses. However, there is no such requirement for an sg-list. As an example, for direct I/O to a block device with a sector size of 512 bytes it is only required that I/O occurs in multiples of 512 bytes and from memory aligned on 512-byte boundaries. So the use of direct I/O can result in an sg-list where the second and subsequent sg-list elements have a non-zero offset. Do you agree with this ? Are such sg-lists mapped correctly by the FRWR code ? Bart. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On Mon, Jul 22, 2013 at 2:46 PM, Bart Van Assche wrote: [...] > What will happen if a new FRWR is submitted with an rkey that is still valid ? The HW will fail this WR, we must invalidate the mapping before re-using the MR. Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On 7/22/2013 2:46 PM, Bart Van Assche wrote: On 07/18/13 15:25, Or Gerlitz wrote: +static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, +struct iser_conn *ib_conn, +struct iser_regd_buf *regd_buf, +u32 offset, unsigned int data_size, +unsigned int page_list_len) +{ +struct ib_send_wr fastreg_wr, inv_wr; +struct ib_send_wr *bad_wr, *wr = NULL; +u8 key; +int ret; + +if (!desc->valid) { +memset(&inv_wr, 0, sizeof(inv_wr)); +inv_wr.opcode = IB_WR_LOCAL_INV; +inv_wr.send_flags = IB_SEND_SIGNALED; +inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; +wr = &inv_wr; +/* Bump the key */ +key = (u8)(desc->data_mr->rkey & 0x00FF); +ib_update_fast_reg_key(desc->data_mr, ++key); +} + +/* Prepare FASTREG WR */ +memset(&fastreg_wr, 0, sizeof(fastreg_wr)); +fastreg_wr.opcode = IB_WR_FAST_REG_MR; +fastreg_wr.send_flags = IB_SEND_SIGNALED; +fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; +fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; +fastreg_wr.wr.fast_reg.page_list_len = page_list_len; +fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; +fastreg_wr.wr.fast_reg.length = data_size; +fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey; +fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); Hello Sagi, If I interpret the above code correctly the rkey used in the previous FRWR is invalidated as soon as a new FRWR is queued. Does this mean that the iSER initiator limits queue depth to one ? Another question: is it on purpose that iscsi_iser_cleanup_task() does not invalidate an rkey if a command has been aborted successfully ? A conforming iSER target does not send a response for aborted commands. Will successful command abortion result in the rkey not being invalidated ? What will happen if a new FRWR is submitted with an rkey that is still valid ? Thanks, Bart. Hey Bart, You interpret correctly, iSER will local invalidate the rkey just before re-using it (conditioned that it is not previously invalidated - remotely by the target). This code is still missing the remote invalidate part, then iSER initiator will publish its remote invalidate support and in case the target may remote invalidate (seen in the RSP completion) the rkey and the Initiator will pick it up in the RSP completion and mark the associated MR as valid (valid for use again). Not sure what you meant in your question, but this does not mean that iSER initiator limits the queue depth to 1, initiator manages a pool of fastreg descriptors of size == max queued commands (per connection) each containing an ib_mr, For each concurrent IOP it takes a fastreg descriptor from the pool, and uses it for registration (if marked as not valid - will local invalidate the rkey and then use it for registration). When cleanup_task -> iser_task_rdma_finalize -> iser_unreg_rdma_mem is called, it just returns the fastreg to the pool (without local invalidate - as it is done when it will be reused). The reason I chose to do that is that if I locally invalidate the rkey upon task cleanup then Only after the completion I'm allowed to return it back to the pool (only then I know it's ready for reuse) and assuming that I still want to evacuate the task and not wait in my fast-path, I may end-up in certain conditions in a situation that I have no resources to handle the next IOP since all MRs are waiting for LOCAL_INV completions. A possible solution here was to heuristically use a larger pool - but I wanted to avoid that... So just to clarify the flow: . at connection establishment allocate pool of fastreg descriptors . upon each IOP take a fastreg descriptor from the pool . if it is not invalidated - invalidate it. . register using FRWR. . when cleanup_task is called - just return the fastreg descriptor to the pool. . at connection teardown free all resources. Still to come: . upon each IOP response, check if the target used remote invalidate - if so mark relevant fastreg as valid. Hope this helps. -Sagi -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
On 07/18/13 15:25, Or Gerlitz wrote: +static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, + struct iser_conn *ib_conn, + struct iser_regd_buf *regd_buf, + u32 offset, unsigned int data_size, + unsigned int page_list_len) +{ + struct ib_send_wr fastreg_wr, inv_wr; + struct ib_send_wr *bad_wr, *wr = NULL; + u8 key; + int ret; + + if (!desc->valid) { + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; + wr = &inv_wr; + /* Bump the key */ + key = (u8)(desc->data_mr->rkey & 0x00FF); + ib_update_fast_reg_key(desc->data_mr, ++key); + } + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof(fastreg_wr)); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; + fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; + fastreg_wr.wr.fast_reg.page_list_len = page_list_len; + fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; + fastreg_wr.wr.fast_reg.length = data_size; + fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey; + fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); Hello Sagi, If I interpret the above code correctly the rkey used in the previous FRWR is invalidated as soon as a new FRWR is queued. Does this mean that the iSER initiator limits queue depth to one ? Another question: is it on purpose that iscsi_iser_cleanup_task() does not invalidate an rkey if a command has been aborted successfully ? A conforming iSER target does not send a response for aborted commands. Will successful command abortion result in the rkey not being invalidated ? What will happen if a new FRWR is submitted with an rkey that is still valid ? Thanks, Bart. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
From: Sagi Grimberg Newer HCAs and Virtual functions may not support FMRs but rather a fast registration model, which we call FRWR - "Fast Registration Work Requests". This model was introduced in 00f7ec36c "RDMA/core: Add memory management extensions support" and works when the IB device supports the IB_DEVICE_MEM_MGT_EXTENSIONS capability. Upon creating the iser device iser will test wether the HCA supports FMRs. if no support for FMRs, check if IB_DEVICE_MEM_MGT_EXTENSIONS is supported and assign function handles that handle fast registration and allocation of appropriate resources (fast_reg descriptors). Registration is done using posting IB_WR_FAST_REG_MR to the QP and invalidations using posting IB_WR_LOCAL_INV. Signed-off-by: Sagi Grimberg Signed-off-by: Or Gerlitz --- drivers/infiniband/ulp/iser/iscsi_iser.h | 21 - drivers/infiniband/ulp/iser/iser_memory.c | 140 - drivers/infiniband/ulp/iser/iser_verbs.c | 138 ++-- 3 files changed, 287 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 75c5352..6791402 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -211,7 +211,7 @@ struct iser_mem_reg { u64 va; u64 len; void *mem_h; - int is_fmr; + int is_mr; }; struct iser_regd_buf { @@ -277,6 +277,15 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +struct fast_reg_descriptor { + struct list_head list; + /* For fast registration - FRWR */ + struct ib_mr *data_mr; + struct ib_fast_reg_page_list *data_frpl; + /* Valid for fast registration flag */ + bool valid; +}; + struct iser_conn { struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ struct iscsi_endpoint*ep; @@ -307,6 +316,10 @@ struct iser_conn { struct iser_page_vec*page_vec; /* represents SG to fmr maps* * maps serialized as tx is*/ } fmr; + struct { + struct list_headpool; + int pool_size; + } frwr; } fastreg; }; @@ -393,6 +406,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, struct sockaddr_in *src_addr, @@ -405,6 +420,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); +void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, +enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -421,4 +438,6 @@ int iser_initialize_task_headers(struct iscsi_task *task, int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); +int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_frwr_pool(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 1985e90..1ce0c97 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -395,8 +395,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, regd_buf = &iser_task->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem, ibdev); - if (aligned_len != mem->dma_nents || - (!ib_conn->fastreg.fmr.pool && mem->dma_nents > 1)) { + if (aligned_len != mem->dma_nents) { err = fall_to_bounce_buf(iser_task, ibdev, cmd_dir, aligned_len); if (err) { @@ -414,7 +413,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, regd_buf->reg.rkey = device->mr->rkey; regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); - regd_buf->reg.is_fmr = 0; + regd_buf->reg.is_mr = 0; iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X " "va: 0x%08lX sz: %ld]\n", @@ -444,3 +443,138 @@ int i