Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-28 Thread Sagi Grimberg

On 7/28/2013 11:15 AM, Or Gerlitz wrote:

On 26/07/2013 20:15, Vu Pham wrote:

Hello Or/Sagi,

Just a minor

 /**
+ * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * for fast registration work requests.
+ * returns 0 on success, or errno code on failure
+ */
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned 
cmds_max)

+{
+struct iser_device*device = ib_conn->device;
+struct fast_reg_descriptor*desc;
+int i, ret;
+
+ INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
+ib_conn->fastreg.frwr.pool_size = 0;
+for (i = 0; i < cmds_max; i++) {
+desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+if (!desc) {
+iser_err("Failed to allocate a new fast_reg 
descriptor\n");

+ret = -ENOMEM;
+goto err;
+}
+
+desc->data_frpl = 
ib_alloc_fast_reg_page_list(device->ib_device,

+ ISCSI_ISER_SG_TABLESIZE + 1);
+if (IS_ERR(desc->data_frpl)) {

ret = PTR_ERR(desc->data_frpl);
+iser_err("Failed to allocate ib_fast_reg_page_list 
err=%ld\n",

+ PTR_ERR(desc->data_frpl));

using ret

+goto err;
+}
+
+desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+if (IS_ERR(desc->data_mr)) {

ret = PTR_ERR(desc->data_mr);

+iser_err("Failed to allocate ib_fast_reg_mr err=%ld\n",
+ PTR_ERR(desc->data_mr));

using ret

+ ib_free_fast_reg_page_list(desc->data_frpl);
+goto err;
+}
+desc->valid = true;
+list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ib_conn->fastreg.frwr.pool_size++;
+}
+
+return 0;
+err:
+iser_free_frwr_pool(ib_conn);
+return ret;
+}





Nice catch!

I see that Roland hasn't yet picked this series so I will re-submit it 
with fixes to the issues you have found here.


Or.



Nice catch indeed, thanks Vu.

-Sagi
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-28 Thread Or Gerlitz

On 26/07/2013 20:15, Vu Pham wrote:

Hello Or/Sagi,

Just a minor

 /**
+ * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * for fast registration work requests.
+ * returns 0 on success, or errno code on failure
+ */
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+{
+struct iser_device*device = ib_conn->device;
+struct fast_reg_descriptor*desc;
+int i, ret;
+
+ INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
+ib_conn->fastreg.frwr.pool_size = 0;
+for (i = 0; i < cmds_max; i++) {
+desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+if (!desc) {
+iser_err("Failed to allocate a new fast_reg descriptor\n");
+ret = -ENOMEM;
+goto err;
+}
+
+desc->data_frpl = 
ib_alloc_fast_reg_page_list(device->ib_device,

+ ISCSI_ISER_SG_TABLESIZE + 1);
+if (IS_ERR(desc->data_frpl)) {

ret = PTR_ERR(desc->data_frpl);
+iser_err("Failed to allocate ib_fast_reg_page_list 
err=%ld\n",

+ PTR_ERR(desc->data_frpl));

using ret

+goto err;
+}
+
+desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+if (IS_ERR(desc->data_mr)) {

ret = PTR_ERR(desc->data_mr);

+iser_err("Failed to allocate ib_fast_reg_mr err=%ld\n",
+ PTR_ERR(desc->data_mr));

using ret

+ ib_free_fast_reg_page_list(desc->data_frpl);
+goto err;
+}
+desc->valid = true;
+list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ib_conn->fastreg.frwr.pool_size++;
+}
+
+return 0;
+err:
+iser_free_frwr_pool(ib_conn);
+return ret;
+}





Nice catch!

I see that Roland hasn't yet picked this series so I will re-submit it 
with fixes to the issues you have found here.


Or.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-26 Thread Vu Pham

Hello Or/Sagi,

Just a minor

 /**
+ * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * for fast registration work requests.
+ * returns 0 on success, or errno code on failure
+ */
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+{
+   struct iser_device  *device = ib_conn->device;
+   struct fast_reg_descriptor  *desc;
+   int i, ret;
+
+   INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
+   ib_conn->fastreg.frwr.pool_size = 0;
+   for (i = 0; i < cmds_max; i++) {
+   desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+   if (!desc) {
+   iser_err("Failed to allocate a new fast_reg 
descriptor\n");
+   ret = -ENOMEM;
+   goto err;
+   }
+
+   desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device,
+
ISCSI_ISER_SG_TABLESIZE + 1);
+   if (IS_ERR(desc->data_frpl)) {
  

ret = PTR_ERR(desc->data_frpl);

+   iser_err("Failed to allocate ib_fast_reg_page_list 
err=%ld\n",
+PTR_ERR(desc->data_frpl));
  

using ret

+   goto err;
+   }
+
+   desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
+ISCSI_ISER_SG_TABLESIZE + 
1);
+   if (IS_ERR(desc->data_mr)) {
  

ret = PTR_ERR(desc->data_mr);

+   iser_err("Failed to allocate ib_fast_reg_mr err=%ld\n",
+PTR_ERR(desc->data_mr));
  

using ret

+   ib_free_fast_reg_page_list(desc->data_frpl);
+   goto err;
+   }
+   desc->valid = true;
+   list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+   ib_conn->fastreg.frwr.pool_size++;
+   }
+
+   return 0;
+err:
+   iser_free_frwr_pool(ib_conn);
+   return ret;
+}
  


-vu
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-26 Thread Bart Van Assche

On 07/24/13 18:28, Or Gerlitz wrote:

On 23/07/2013 17:47, Bart Van Assche wrote:

Still regarding page sizes: shouldn't ib_alloc_fast_reg_page_list()
and ib_alloc_fast_reg_mr() multiply the SG list length by PAGE_SIZE /
SIZE_4K to compensate for page size differences on architectures where
virtual memory pages are larger than 4KB ?


My understanding of the API is that it assumes that the HW is capable to
change the page shift (size) associated with certain fast_reg MR for
each mapping.


I think you have a point here. Although I'm not sure this is what you 
had in mind, using PAGE_SIZE / PAGE_SHIFT instead of SIZE_4K / SHIFT_4K 
should work better on architectures with a page size larger than 4KB. On 
mlx4 the largest supported page list length is 511 
(MLX4_MAX_FAST_REG_PAGES). So the former option should allow to support 
larger sg lists on such architectures (up to length 511) compared to the 
latter (up to length 511/(64/4) = 31 for 64KB pages).


Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-24 Thread Or Gerlitz

On 23/07/2013 17:47, Bart Van Assche wrote:


Sorry but I had overlooked the bounce buffer patch. Regarding page 
sizes: is an InfiniBand HCA required to support a page size of 512 
bytes ? To me it seems like the smallest page size supported by e.g. 
the ocrdma driver is 4KB. From ocrdma_query_device():


attr->page_size_cap = 0x000;


I don't think supporting "block lists" that is pages size of 512B for 
memory registration is mandated by the IB spec, even though ConnectX2/3 
HCAs do support it,


#ibv_devinfo -v | grep page_size_cap
page_size_cap:  0xfe00

# ibv_devinfo -v | grep _id
hca_id: mlx4_0
vendor_id:  0x02c9
vendor_part_id: 26428
board_id:   MT_0D81120009




Still regarding page sizes: shouldn't ib_alloc_fast_reg_page_list() 
and ib_alloc_fast_reg_mr() multiply the SG list length by PAGE_SIZE / 
SIZE_4K to compensate for page size differences on architectures where 
virtual memory pages are larger than 4KB ?


My understanding of the API is that it assumes that the HW is capable to 
change the page shift (size) associated with certain fast_reg MR for 
each mapping.


This is indeed a bit different from the kernel FMR API where the page 
shift is provided when the FMR is created.


For mlx4, the relevant field is struct mlx4_mpt_entry->entity_size which 
is written with the page_shift provided by the ULP
for FMRs in mlx4_mr_enable and with 0 for fast_reg mrs. Later when fast 
reg WR is posted to the HW,
struct mlx4_wqe_fmr_seg->page_size is written with the page shift 
provided by the ULP and the HW treats it the same as it

was that entity_size field...

Or.




--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-23 Thread Bart Van Assche

On 07/23/13 16:21, Or Gerlitz wrote:

Bart, iSER's FMR and FRWR code works under the assumption that an SG
list is "4K aligned". For SGs which don't obey that assumption we're
using bounce buffer.

Note that the SG "page" size used by FMRs/FRWRs doesn't have to be 1:1
with the OS page size, so in that respect && down the road, we will
get rid of the bounce buffer thing with having another FMR/FRWR pool
whose "page" size is 512B and will be used for SG which are not "4K
aligned".


Sorry but I had overlooked the bounce buffer patch. Regarding page 
sizes: is an InfiniBand HCA required to support a page size of 512 bytes 
? To me it seems like the smallest page size supported by e.g. the 
ocrdma driver is 4KB. From ocrdma_query_device():


attr->page_size_cap = 0x000;

Still regarding page sizes: shouldn't ib_alloc_fast_reg_page_list() and 
ib_alloc_fast_reg_mr() multiply the SG list length by PAGE_SIZE / 
SIZE_4K to compensate for page size differences on architectures where 
virtual memory pages are larger than 4KB ?


Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-23 Thread Sagi Grimberg

On 7/23/2013 2:58 PM, Bart Van Assche wrote:

On 07/22/13 15:11, Sagi Grimberg wrote:

So just to clarify the flow:
. at connection establishment allocate pool of fastreg descriptors
. upon each IOP take a fastreg descriptor from the pool
 . if it is not invalidated - invalidate it.
 . register using FRWR.
. when cleanup_task is called - just return the fastreg descriptor to
the pool.
. at connection teardown free all resources.
Still to come:
. upon each IOP response, check if the target used remote invalidate -
if so mark relevant fastreg as valid.


Hello Sagi and Or,

Thanks for the clarifications. I have one more question though. My 
interpretation of section "10.6 Memory Management" in the IB 
specification is that memory registration maps a memory region that 
either has contiguous virtual addresses or contiguous physical 
addresses. However, there is no such requirement for an sg-list. As an 
example, for direct I/O to a block device with a sector size of 512 
bytes it is only required that I/O occurs in multiples of 512 bytes 
and from memory aligned on 512-byte boundaries. So the use of direct 
I/O can result in an sg-list where the second and subsequent sg-list 
elements have a non-zero offset. Do you agree with this ? Are such 
sg-lists mapped correctly by the FRWR code ?


Bart.



Hey Bart,

You are on the money with this observation, like FMRs, FRWR cannot 
register any arbitrary SG-list. You have the same limitations.
Unlike SRP where the initiator will use multiple FMRs to register such 
"unaligned" SG-lists,
iSER uses a bounce buffer to copy the data to a nice physically 
contiguous memory area (see patch 5/7 fall_to_bounce_buf routine), thus 
will pass a single R_Key for each transaction.
An equivalent FRWR implementation for SRP will also use multiple FRWRs 
in-order to register such "un-aligned" SG-lists and publish the R_Keys 
in ib_sge.


Hope this helps,

-Sagi
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-23 Thread Or Gerlitz
On Tue, Jul 23, 2013 at 2:58 PM, Bart Van Assche  wrote:
>
> [...]
> Hello Sagi and Or,
>
> Thanks for the clarifications. I have one more question though. My 
> interpretation of section "10.6 Memory Management" in the IB specification is 
> that memory registration maps a memory region that either has contiguous 
> virtual addresses or contiguous physical addresses. However, there is no such 
> requirement for an sg-list. As an example, for direct I/O to a block device 
> with a sector size of 512 bytes it is only required that I/O occurs in 
> multiples of 512 bytes and from memory aligned on 512-byte boundaries. So the 
> use of direct I/O can result in an sg-list where the second and subsequent 
> sg-list elements have a non-zero offset. Do you agree with this ?



YES, this can happen.



>
> Are such sg-lists mapped correctly by the FRWR code ?



Bart, iSER's FMR and FRWR code works under the assumption that an SG
list is "4K aligned". For SGs which don't obey that assumption we're
using bounce buffer.

Note that the SG "page" size used by FMRs/FRWRs doesn't have to be 1:1
with the OS page size, so in that respect && down the road, we will
get rid of the bounce buffer thing with having another FMR/FRWR pool
whose "page" size is 512B and will be used for SG which are not "4K
aligned".

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-23 Thread Bart Van Assche

On 07/22/13 15:11, Sagi Grimberg wrote:

So just to clarify the flow:
. at connection establishment allocate pool of fastreg descriptors
. upon each IOP take a fastreg descriptor from the pool
 . if it is not invalidated - invalidate it.
 . register using FRWR.
. when cleanup_task is called - just return the fastreg descriptor to
the pool.
. at connection teardown free all resources.
Still to come:
. upon each IOP response, check if the target used remote invalidate -
if so mark relevant fastreg as valid.


Hello Sagi and Or,

Thanks for the clarifications. I have one more question though. My 
interpretation of section "10.6 Memory Management" in the IB 
specification is that memory registration maps a memory region that 
either has contiguous virtual addresses or contiguous physical 
addresses. However, there is no such requirement for an sg-list. As an 
example, for direct I/O to a block device with a sector size of 512 
bytes it is only required that I/O occurs in multiples of 512 bytes and 
from memory aligned on 512-byte boundaries. So the use of direct I/O can 
result in an sg-list where the second and subsequent sg-list elements 
have a non-zero offset. Do you agree with this ? Are such sg-lists 
mapped correctly by the FRWR code ?


Bart.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-22 Thread Or Gerlitz
On Mon, Jul 22, 2013 at 2:46 PM, Bart Van Assche  wrote:
[...]
> What will happen if a new FRWR is submitted with an rkey that is still valid ?

The HW will fail this WR, we must invalidate the mapping before re-using the MR.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-22 Thread Sagi Grimberg

On 7/22/2013 2:46 PM, Bart Van Assche wrote:

On 07/18/13 15:25, Or Gerlitz wrote:

+static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
+struct iser_conn *ib_conn,
+struct iser_regd_buf *regd_buf,
+u32 offset, unsigned int data_size,
+unsigned int page_list_len)
+{
+struct ib_send_wr fastreg_wr, inv_wr;
+struct ib_send_wr *bad_wr, *wr = NULL;
+u8 key;
+int ret;
+
+if (!desc->valid) {
+memset(&inv_wr, 0, sizeof(inv_wr));
+inv_wr.opcode = IB_WR_LOCAL_INV;
+inv_wr.send_flags = IB_SEND_SIGNALED;
+inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
+wr = &inv_wr;
+/* Bump the key */
+key = (u8)(desc->data_mr->rkey & 0x00FF);
+ib_update_fast_reg_key(desc->data_mr, ++key);
+}
+
+/* Prepare FASTREG WR */
+memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+fastreg_wr.send_flags = IB_SEND_SIGNALED;
+fastreg_wr.wr.fast_reg.iova_start = 
desc->data_frpl->page_list[0] + offset;

+fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
+fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
+fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
+fastreg_wr.wr.fast_reg.length = data_size;
+fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
+fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
+   IB_ACCESS_REMOTE_WRITE |
+   IB_ACCESS_REMOTE_READ);


Hello Sagi,

If I interpret the above code correctly the rkey used in the previous 
FRWR is invalidated as soon as a new FRWR is queued. Does this mean 
that the iSER initiator limits queue depth to one ?


Another question: is it on purpose that iscsi_iser_cleanup_task() does 
not invalidate an rkey if a command has been aborted successfully ? A 
conforming iSER target does not send a response for aborted commands. 
Will successful command abortion result in the rkey not being 
invalidated ? What will happen if a new FRWR is submitted with an rkey 
that is still valid ?


Thanks,

Bart.



Hey Bart,

You interpret correctly, iSER will local invalidate the rkey just before 
re-using it (conditioned that it is not previously invalidated - 
remotely by the target).
This code is still missing the remote invalidate part, then iSER 
initiator will publish its remote invalidate support and in case the 
target may remote invalidate (seen in the RSP completion) the rkey
and the Initiator will pick it up in the RSP completion and mark the 
associated MR as valid (valid for use again).


Not sure what you meant in your question, but this does not mean that 
iSER initiator limits the queue depth to 1,
initiator manages a pool of fastreg descriptors of size == max queued 
commands (per connection) each containing an ib_mr,
For each concurrent IOP it takes a fastreg descriptor from the pool, and 
uses it for registration (if marked as not valid - will local invalidate 
the rkey and then use it for registration).
When cleanup_task -> iser_task_rdma_finalize -> iser_unreg_rdma_mem is 
called,
it just returns the fastreg to the pool (without local invalidate - as 
it is done when it will be reused).


The reason I chose to do that is that if I locally invalidate the rkey 
upon task cleanup then
Only after the completion I'm allowed to return it back to the pool 
(only then I know it's ready for reuse) and assuming that I still want 
to evacuate the task and not wait in my fast-path,
I may end-up in certain conditions in a situation that I have no 
resources to handle the next IOP since all MRs are waiting for LOCAL_INV 
completions.
A possible solution here was to heuristically use a larger pool - but I 
wanted to avoid that...


So just to clarify the flow:
. at connection establishment allocate pool of fastreg descriptors
. upon each IOP take a fastreg descriptor from the pool
. if it is not invalidated - invalidate it.
. register using FRWR.
. when cleanup_task is called - just return the fastreg descriptor to 
the pool.

. at connection teardown free all resources.
Still to come:
. upon each IOP response, check if the target used remote invalidate - 
if so mark relevant fastreg as valid.


Hope this helps.

-Sagi
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-22 Thread Bart Van Assche

On 07/18/13 15:25, Or Gerlitz wrote:

+static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
+   struct iser_conn *ib_conn,
+   struct iser_regd_buf *regd_buf,
+   u32 offset, unsigned int data_size,
+   unsigned int page_list_len)
+{
+   struct ib_send_wr fastreg_wr, inv_wr;
+   struct ib_send_wr *bad_wr, *wr = NULL;
+   u8 key;
+   int ret;
+
+   if (!desc->valid) {
+   memset(&inv_wr, 0, sizeof(inv_wr));
+   inv_wr.opcode = IB_WR_LOCAL_INV;
+   inv_wr.send_flags = IB_SEND_SIGNALED;
+   inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
+   wr = &inv_wr;
+   /* Bump the key */
+   key = (u8)(desc->data_mr->rkey & 0x00FF);
+   ib_update_fast_reg_key(desc->data_mr, ++key);
+   }
+
+   /* Prepare FASTREG WR */
+   memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+   fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+   fastreg_wr.send_flags = IB_SEND_SIGNALED;
+   fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + 
offset;
+   fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
+   fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
+   fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
+   fastreg_wr.wr.fast_reg.length = data_size;
+   fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
+   fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
+  IB_ACCESS_REMOTE_WRITE |
+  IB_ACCESS_REMOTE_READ);


Hello Sagi,

If I interpret the above code correctly the rkey used in the previous 
FRWR is invalidated as soon as a new FRWR is queued. Does this mean that 
the iSER initiator limits queue depth to one ?


Another question: is it on purpose that iscsi_iser_cleanup_task() does 
not invalidate an rkey if a command has been aborted successfully ? A 
conforming iSER target does not send a response for aborted commands. 
Will successful command abortion result in the rkey not being 
invalidated ? What will happen if a new FRWR is submitted with an rkey 
that is still valid ?


Thanks,

Bart.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)

2013-07-18 Thread Or Gerlitz
From: Sagi Grimberg 

Newer HCAs and Virtual functions may not support FMRs but rather a fast
registration model, which we call FRWR - "Fast Registration Work Requests".

This model was introduced in 00f7ec36c "RDMA/core: Add memory management
extensions support" and works when the IB device supports the
IB_DEVICE_MEM_MGT_EXTENSIONS capability.

Upon creating the iser device iser will test wether the HCA supports FMRs.
if no support for FMRs, check if IB_DEVICE_MEM_MGT_EXTENSIONS is supported and
assign function handles that handle fast registration and allocation of
appropriate resources (fast_reg descriptors).

Registration is done using posting IB_WR_FAST_REG_MR to the QP and
invalidations using posting IB_WR_LOCAL_INV.

Signed-off-by: Sagi Grimberg 
Signed-off-by: Or Gerlitz 
---
 drivers/infiniband/ulp/iser/iscsi_iser.h  |   21 -
 drivers/infiniband/ulp/iser/iser_memory.c |  140 -
 drivers/infiniband/ulp/iser/iser_verbs.c  |  138 ++--
 3 files changed, 287 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h 
b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 75c5352..6791402 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -211,7 +211,7 @@ struct iser_mem_reg {
u64  va;
u64  len;
void *mem_h;
-   int  is_fmr;
+   int  is_mr;
 };
 
 struct iser_regd_buf {
@@ -277,6 +277,15 @@ struct iser_device {
enum iser_data_dir 
cmd_dir);
 };
 
+struct fast_reg_descriptor {
+   struct list_head  list;
+   /* For fast registration - FRWR */
+   struct ib_mr *data_mr;
+   struct ib_fast_reg_page_list *data_frpl;
+   /* Valid for fast registration flag */
+   bool  valid;
+};
+
 struct iser_conn {
struct iscsi_iser_conn   *iser_conn; /* iser conn for upcalls  */
struct iscsi_endpoint*ep;
@@ -307,6 +316,10 @@ struct iser_conn {
struct iser_page_vec*page_vec; /* represents SG to 
fmr maps*
* maps serialized 
as tx is*/
} fmr;
+   struct {
+   struct list_headpool;
+   int pool_size;
+   } frwr;
} fastreg;
 };
 
@@ -393,6 +406,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task 
*task,
 
 int  iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
   enum iser_data_dir cmd_dir);
+int  iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task,
+   enum iser_data_dir cmd_dir);
 
 int  iser_connect(struct iser_conn   *ib_conn,
  struct sockaddr_in *src_addr,
@@ -405,6 +420,8 @@ int  iser_reg_page_vec(struct iser_conn *ib_conn,
 
 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+enum iser_data_dir cmd_dir);
 
 int  iser_post_recvl(struct iser_conn *ib_conn);
 int  iser_post_recvm(struct iser_conn *ib_conn, int count);
@@ -421,4 +438,6 @@ int  iser_initialize_task_headers(struct iscsi_task *task,
 int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session 
*session);
 int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
 void iser_free_fmr_pool(struct iser_conn *ib_conn);
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_frwr_pool(struct iser_conn *ib_conn);
 #endif
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c 
b/drivers/infiniband/ulp/iser/iser_memory.c
index 1985e90..1ce0c97 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -395,8 +395,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
regd_buf = &iser_task->rdma_regd[cmd_dir];
 
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
-   if (aligned_len != mem->dma_nents ||
-   (!ib_conn->fastreg.fmr.pool && mem->dma_nents > 1)) {
+   if (aligned_len != mem->dma_nents) {
err = fall_to_bounce_buf(iser_task, ibdev,
 cmd_dir, aligned_len);
if (err) {
@@ -414,7 +413,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
-   regd_buf->reg.is_fmr = 0;
+   regd_buf->reg.is_mr = 0;
 
iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
 "va: 0x%08lX sz: %ld]\n",
@@ -444,3 +443,138 @@ int i