Re: [PATCH v3 12/26] xprtrdma: Port to new memory registration API

2015-10-09 Thread Sagi Grimberg



out_list_err:
-   rc = PTR_ERR(f->fr_pgl);
+   rc = -ENOMEM;
dprintk("RPC:   %s: ib_alloc_fast_reg_page_list status %i\n”,


Should you update this debugging message?


Yes I will.


+   n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+   if (unlikely(n != frmr->sg_nents)) {


The basic logic looks OK. But signed v. unsigned comparison here?
Is there a guarantee that the maximum value sg_nents can have is
INT_MAX?

i and n are signed, sg_nents is unsigned. I’d prefer to have
the variable signage all agree. Since we’re counting stuff,
maybe unsigned all around is a better choice? (rc should
remain signed).


OK, I'll make i, n, len unsigned integers.



If ib_map_mr_sg can return a negative value, maybe “rc = ib_map_mr_sg”
is more clear. Is n used anywhere else?


Nop.



Use %u formatter for displaying unsigned variables.



+   pr_err("RPC:   %s: failed to map mr %p (%d/%d)\n",
+  __func__, frmr->fr_mr, n, frmr->sg_nents);
+   rc = n < 0 ? n : -EINVAL;
+   goto out_senderr;


Is frmr->sg_nents set correctly here for the ib_dma_unmap_sg()
call in the error exit? Maybe you want to use the value in
“n” instead (as long as it’s positive, of course).


Umm, I think that frmr->sg_nents is assigned before the unmap is even
reachable (we assign before mapping because we won't take partial
maapings at least for now).



I'll be waiting for more comments before posting v4.

Cheers,
Sagi.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 12/26] xprtrdma: Port to new memory registration API

2015-10-08 Thread Chuck Lever

> On Oct 8, 2015, at 3:50 AM, Sagi Grimberg  wrote:
> 
> Instead of maintaining a fastreg page list, keep an sg table
> and convert an array of pages to a sg list. Then call ib_map_mr_sg
> and construct ib_reg_wr.
> 
> Note that the next step would be to have NFS work with sg lists
> as it maps well to sk_frags (see comment from hch
> http://marc.info/?l=linux-rdma=143677002622296=2).
> 
> Signed-off-by: Sagi Grimberg 
> Acked-by: Christoph Hellwig 
> Tested-by: Steve Wise 
> Tested-by: Selvin Xavier 

Comments below.


> ---
> net/sunrpc/xprtrdma/frwr_ops.c  | 112 +++-
> net/sunrpc/xprtrdma/xprt_rdma.h |   3 +-
> 2 files changed, 67 insertions(+), 48 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 0d2f46f600b6..3c89f8052786 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, 
> struct ib_device *device,
>   f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
>   if (IS_ERR(f->fr_mr))
>   goto out_mr_err;
> - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
> - if (IS_ERR(f->fr_pgl))
> +
> + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
> + if (!f->sg)
>   goto out_list_err;
> +
> + sg_init_table(f->sg, depth);
> +
>   return 0;
> 
> out_mr_err:
> @@ -163,7 +167,7 @@ out_mr_err:
>   return rc;
> 
> out_list_err:
> - rc = PTR_ERR(f->fr_pgl);
> + rc = -ENOMEM;
>   dprintk("RPC:   %s: ib_alloc_fast_reg_page_list status %i\n”,

Should you update this debugging message?


>   __func__, rc);
>   ib_dereg_mr(f->fr_mr);
> @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
>   if (rc)
>   dprintk("RPC:   %s: ib_dereg_mr status %i\n",
>   __func__, rc);
> - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
> + kfree(r->r.frmr.sg);
> }
> 
> static int
> @@ -312,14 +316,11 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct 
> rpcrdma_mr_seg *seg,
>   struct rpcrdma_mw *mw;
>   struct rpcrdma_frmr *frmr;
>   struct ib_mr *mr;
> - struct ib_fast_reg_wr fastreg_wr;
> + struct ib_reg_wr reg_wr;
>   struct ib_send_wr *bad_wr;
> + unsigned int dma_nents;
>   u8 key;
> - int len, pageoff;
> - int i, rc;
> - int seg_len;
> - u64 pa;
> - int page_no;
> + int i, rc, len, n;
> 
>   mw = seg1->rl_mw;
>   seg1->rl_mw = NULL;
> @@ -332,64 +333,81 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct 
> rpcrdma_mr_seg *seg,
>   } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
>   frmr = >r.frmr;
>   frmr->fr_state = FRMR_IS_VALID;
> + mr = frmr->fr_mr;
> 
> - pageoff = offset_in_page(seg1->mr_offset);
> - seg1->mr_offset -= pageoff; /* start of page */
> - seg1->mr_len += pageoff;
> - len = -pageoff;
>   if (nsegs > ia->ri_max_frmr_depth)
>   nsegs = ia->ri_max_frmr_depth;
> 
> - for (page_no = i = 0; i < nsegs;) {
> - rpcrdma_map_one(device, seg, direction);
> - pa = seg->mr_dma;
> - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
> - frmr->fr_pgl->page_list[page_no++] = pa;
> - pa += PAGE_SIZE;
> - }
> + for (len = 0, i = 0; i < nsegs;) {
> + if (seg->mr_page)
> + sg_set_page(>sg[i],
> + seg->mr_page,
> + seg->mr_len,
> + offset_in_page(seg->mr_offset));
> + else
> + sg_set_buf(>sg[i], seg->mr_offset,
> +seg->mr_len);
> +
>   len += seg->mr_len;
>   ++seg;
>   ++i;
> +
>   /* Check for holes */
>   if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>   break;
>   }
> + frmr->sg_nents = i;
> +
> + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
> + if (!dma_nents) {
> + pr_err("RPC:   %s: failed to dma map sg %p sg_nents %d\n",
> +__func__, frmr->sg, frmr->sg_nents);
> + return -ENOMEM;
> + }
> +
> + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
> + if (unlikely(n != frmr->sg_nents)) {

The basic logic looks OK. But signed v. unsigned comparison here?
Is there a guarantee that the maximum value sg_nents can have is
INT_MAX?

i and n are signed, sg_nents is unsigned. I’d prefer to have
the variable signage all agree. Since we’re counting stuff,
maybe unsigned all around is a better choice? (rc 

Re: [PATCH v3 12/26] xprtrdma: Port to new memory registration API

2015-10-08 Thread Anna Schumaker
Hi Sagi,

On 10/08/2015 03:50 AM, Sagi Grimberg wrote:
> Instead of maintaining a fastreg page list, keep an sg table
> and convert an array of pages to a sg list. Then call ib_map_mr_sg
> and construct ib_reg_wr.
> 
> Note that the next step would be to have NFS work with sg lists
> as it maps well to sk_frags (see comment from hch
> http://marc.info/?l=linux-rdma=143677002622296=2).

Looks good to me!

Acked-by: Anna Schumaker 
> 
> Signed-off-by: Sagi Grimberg 
> Acked-by: Christoph Hellwig 
> Tested-by: Steve Wise 
> Tested-by: Selvin Xavier 
> ---
>  net/sunrpc/xprtrdma/frwr_ops.c  | 112 
> +++-
>  net/sunrpc/xprtrdma/xprt_rdma.h |   3 +-
>  2 files changed, 67 insertions(+), 48 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 0d2f46f600b6..3c89f8052786 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, 
> struct ib_device *device,
>   f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
>   if (IS_ERR(f->fr_mr))
>   goto out_mr_err;
> - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
> - if (IS_ERR(f->fr_pgl))
> +
> + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
> + if (!f->sg)
>   goto out_list_err;
> +
> + sg_init_table(f->sg, depth);
> +
>   return 0;
>  
>  out_mr_err:
> @@ -163,7 +167,7 @@ out_mr_err:
>   return rc;
>  
>  out_list_err:
> - rc = PTR_ERR(f->fr_pgl);
> + rc = -ENOMEM;
>   dprintk("RPC:   %s: ib_alloc_fast_reg_page_list status %i\n",
>   __func__, rc);
>   ib_dereg_mr(f->fr_mr);
> @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
>   if (rc)
>   dprintk("RPC:   %s: ib_dereg_mr status %i\n",
>   __func__, rc);
> - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
> + kfree(r->r.frmr.sg);
>  }
>  
>  static int
> @@ -312,14 +316,11 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct 
> rpcrdma_mr_seg *seg,
>   struct rpcrdma_mw *mw;
>   struct rpcrdma_frmr *frmr;
>   struct ib_mr *mr;
> - struct ib_fast_reg_wr fastreg_wr;
> + struct ib_reg_wr reg_wr;
>   struct ib_send_wr *bad_wr;
> + unsigned int dma_nents;
>   u8 key;
> - int len, pageoff;
> - int i, rc;
> - int seg_len;
> - u64 pa;
> - int page_no;
> + int i, rc, len, n;
>  
>   mw = seg1->rl_mw;
>   seg1->rl_mw = NULL;
> @@ -332,64 +333,81 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct 
> rpcrdma_mr_seg *seg,
>   } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
>   frmr = >r.frmr;
>   frmr->fr_state = FRMR_IS_VALID;
> + mr = frmr->fr_mr;
>  
> - pageoff = offset_in_page(seg1->mr_offset);
> - seg1->mr_offset -= pageoff; /* start of page */
> - seg1->mr_len += pageoff;
> - len = -pageoff;
>   if (nsegs > ia->ri_max_frmr_depth)
>   nsegs = ia->ri_max_frmr_depth;
>  
> - for (page_no = i = 0; i < nsegs;) {
> - rpcrdma_map_one(device, seg, direction);
> - pa = seg->mr_dma;
> - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
> - frmr->fr_pgl->page_list[page_no++] = pa;
> - pa += PAGE_SIZE;
> - }
> + for (len = 0, i = 0; i < nsegs;) {
> + if (seg->mr_page)
> + sg_set_page(>sg[i],
> + seg->mr_page,
> + seg->mr_len,
> + offset_in_page(seg->mr_offset));
> + else
> + sg_set_buf(>sg[i], seg->mr_offset,
> +seg->mr_len);
> +
>   len += seg->mr_len;
>   ++seg;
>   ++i;
> +
>   /* Check for holes */
>   if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>   break;
>   }
> + frmr->sg_nents = i;
> +
> + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
> + if (!dma_nents) {
> + pr_err("RPC:   %s: failed to dma map sg %p sg_nents %d\n",
> +__func__, frmr->sg, frmr->sg_nents);
> + return -ENOMEM;
> + }
> +
> + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
> + if (unlikely(n != frmr->sg_nents)) {
> + pr_err("RPC:   %s: failed to map mr %p (%d/%d)\n",
> +__func__, frmr->fr_mr, n, frmr->sg_nents);
> + rc = n < 0 ? n : -EINVAL;
> + goto out_senderr;
> + }
> +
>   dprintk("RPC:   %s: Using frmr %p to map %d segments (%d bytes)\n",
> -   

Re: [PATCH v3 12/26] xprtrdma: Port to new memory registration API

2015-10-08 Thread Or Gerlitz
On Thu, Oct 8, 2015 at 10:50 AM, Sagi Grimberg  wrote:
> Instead of maintaining a fastreg page list, keep an sg table
> and convert an array of pages to a sg list. Then call ib_map_mr_sg
> and construct ib_reg_wr.
>
> Note that the next step would be to have NFS work with sg lists
> as it maps well to sk_frags (see comment from hch
> http://marc.info/?l=linux-rdma=143677002622296=2).


I don't see the point to keep the 2nd paragraph in the upstream git log
forever, it deserves to be after the --- line such that when the maintainer
uses git am this stays out, so apply this in the respin you'd be doing
to address Chuck's comments.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html