Re: [PATCH v3 12/26] xprtrdma: Port to new memory registration API
out_list_err: - rc = PTR_ERR(f->fr_pgl); + rc = -ENOMEM; dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n”, Should you update this debugging message? Yes I will. + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + if (unlikely(n != frmr->sg_nents)) { The basic logic looks OK. But signed v. unsigned comparison here? Is there a guarantee that the maximum value sg_nents can have is INT_MAX? i and n are signed, sg_nents is unsigned. I’d prefer to have the variable signage all agree. Since we’re counting stuff, maybe unsigned all around is a better choice? (rc should remain signed). OK, I'll make i, n, len unsigned integers. If ib_map_mr_sg can return a negative value, maybe “rc = ib_map_mr_sg” is more clear. Is n used anywhere else? Nop. Use %u formatter for displaying unsigned variables. + pr_err("RPC: %s: failed to map mr %p (%d/%d)\n", + __func__, frmr->fr_mr, n, frmr->sg_nents); + rc = n < 0 ? n : -EINVAL; + goto out_senderr; Is frmr->sg_nents set correctly here for the ib_dma_unmap_sg() call in the error exit? Maybe you want to use the value in “n” instead (as long as it’s positive, of course). Umm, I think that frmr->sg_nents is assigned before the unmap is even reachable (we assign before mapping because we won't take partial maapings at least for now). I'll be waiting for more comments before posting v4. Cheers, Sagi. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 12/26] xprtrdma: Port to new memory registration API
On Thu, Oct 8, 2015 at 10:50 AM, Sagi Grimberg wrote: > Instead of maintaining a fastreg page list, keep an sg table > and convert an array of pages to a sg list. Then call ib_map_mr_sg > and construct ib_reg_wr. > > Note that the next step would be to have NFS work with sg lists > as it maps well to sk_frags (see comment from hch > http://marc.info/?l=linux-rdma&m=143677002622296&w=2). I don't see the point to keep the 2nd paragraph in the upstream git log forever, it deserves to be after the --- line such that when the maintainer uses git am this stays out, so apply this in the respin you'd be doing to address Chuck's comments. Or. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 12/26] xprtrdma: Port to new memory registration API
> On Oct 8, 2015, at 3:50 AM, Sagi Grimberg wrote: > > Instead of maintaining a fastreg page list, keep an sg table > and convert an array of pages to a sg list. Then call ib_map_mr_sg > and construct ib_reg_wr. > > Note that the next step would be to have NFS work with sg lists > as it maps well to sk_frags (see comment from hch > http://marc.info/?l=linux-rdma&m=143677002622296&w=2). > > Signed-off-by: Sagi Grimberg > Acked-by: Christoph Hellwig > Tested-by: Steve Wise > Tested-by: Selvin Xavier Comments below. > --- > net/sunrpc/xprtrdma/frwr_ops.c | 112 +++- > net/sunrpc/xprtrdma/xprt_rdma.h | 3 +- > 2 files changed, 67 insertions(+), 48 deletions(-) > > diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c > index 0d2f46f600b6..3c89f8052786 100644 > --- a/net/sunrpc/xprtrdma/frwr_ops.c > +++ b/net/sunrpc/xprtrdma/frwr_ops.c > @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, > struct ib_device *device, > f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); > if (IS_ERR(f->fr_mr)) > goto out_mr_err; > - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); > - if (IS_ERR(f->fr_pgl)) > + > + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); > + if (!f->sg) > goto out_list_err; > + > + sg_init_table(f->sg, depth); > + > return 0; > > out_mr_err: > @@ -163,7 +167,7 @@ out_mr_err: > return rc; > > out_list_err: > - rc = PTR_ERR(f->fr_pgl); > + rc = -ENOMEM; > dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n”, Should you update this debugging message? > __func__, rc); > ib_dereg_mr(f->fr_mr); > @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r) > if (rc) > dprintk("RPC: %s: ib_dereg_mr status %i\n", > __func__, rc); > - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); > + kfree(r->r.frmr.sg); > } > > static int > @@ -312,14 +316,11 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct > rpcrdma_mr_seg *seg, > struct rpcrdma_mw *mw; > struct rpcrdma_frmr *frmr; > struct ib_mr *mr; > - struct ib_fast_reg_wr fastreg_wr; > + struct ib_reg_wr reg_wr; > struct ib_send_wr *bad_wr; > + unsigned int dma_nents; > u8 key; > - int len, pageoff; > - int i, rc; > - int seg_len; > - u64 pa; > - int page_no; > + int i, rc, len, n; > > mw = seg1->rl_mw; > seg1->rl_mw = NULL; > @@ -332,64 +333,81 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct > rpcrdma_mr_seg *seg, > } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); > frmr = &mw->r.frmr; > frmr->fr_state = FRMR_IS_VALID; > + mr = frmr->fr_mr; > > - pageoff = offset_in_page(seg1->mr_offset); > - seg1->mr_offset -= pageoff; /* start of page */ > - seg1->mr_len += pageoff; > - len = -pageoff; > if (nsegs > ia->ri_max_frmr_depth) > nsegs = ia->ri_max_frmr_depth; > > - for (page_no = i = 0; i < nsegs;) { > - rpcrdma_map_one(device, seg, direction); > - pa = seg->mr_dma; > - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { > - frmr->fr_pgl->page_list[page_no++] = pa; > - pa += PAGE_SIZE; > - } > + for (len = 0, i = 0; i < nsegs;) { > + if (seg->mr_page) > + sg_set_page(&frmr->sg[i], > + seg->mr_page, > + seg->mr_len, > + offset_in_page(seg->mr_offset)); > + else > + sg_set_buf(&frmr->sg[i], seg->mr_offset, > +seg->mr_len); > + > len += seg->mr_len; > ++seg; > ++i; > + > /* Check for holes */ > if ((i < nsegs && offset_in_page(seg->mr_offset)) || > offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) > break; > } > + frmr->sg_nents = i; > + > + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); > + if (!dma_nents) { > + pr_err("RPC: %s: failed to dma map sg %p sg_nents %d\n", > +__func__, frmr->sg, frmr->sg_nents); > + return -ENOMEM; > + } > + > + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); > + if (unlikely(n != frmr->sg_nents)) { The basic logic looks OK. But signed v. unsigned comparison here? Is there a guarantee that the maximum value sg_nents can have is INT_MAX? i and n are signed, sg_nents is unsigned. I’d prefer to have the variable signage all agree. Since we’re counting stuff, maybe unsigned all around is a better choice? (rc should remain signed). If ib_map_mr_sg can return a negative value, maybe “rc = ib_map_mr_sg”
Re: [PATCH v3 12/26] xprtrdma: Port to new memory registration API
Hi Sagi, On 10/08/2015 03:50 AM, Sagi Grimberg wrote: > Instead of maintaining a fastreg page list, keep an sg table > and convert an array of pages to a sg list. Then call ib_map_mr_sg > and construct ib_reg_wr. > > Note that the next step would be to have NFS work with sg lists > as it maps well to sk_frags (see comment from hch > http://marc.info/?l=linux-rdma&m=143677002622296&w=2). Looks good to me! Acked-by: Anna Schumaker > > Signed-off-by: Sagi Grimberg > Acked-by: Christoph Hellwig > Tested-by: Steve Wise > Tested-by: Selvin Xavier > --- > net/sunrpc/xprtrdma/frwr_ops.c | 112 > +++- > net/sunrpc/xprtrdma/xprt_rdma.h | 3 +- > 2 files changed, 67 insertions(+), 48 deletions(-) > > diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c > index 0d2f46f600b6..3c89f8052786 100644 > --- a/net/sunrpc/xprtrdma/frwr_ops.c > +++ b/net/sunrpc/xprtrdma/frwr_ops.c > @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, > struct ib_device *device, > f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); > if (IS_ERR(f->fr_mr)) > goto out_mr_err; > - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); > - if (IS_ERR(f->fr_pgl)) > + > + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); > + if (!f->sg) > goto out_list_err; > + > + sg_init_table(f->sg, depth); > + > return 0; > > out_mr_err: > @@ -163,7 +167,7 @@ out_mr_err: > return rc; > > out_list_err: > - rc = PTR_ERR(f->fr_pgl); > + rc = -ENOMEM; > dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", > __func__, rc); > ib_dereg_mr(f->fr_mr); > @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r) > if (rc) > dprintk("RPC: %s: ib_dereg_mr status %i\n", > __func__, rc); > - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); > + kfree(r->r.frmr.sg); > } > > static int > @@ -312,14 +316,11 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct > rpcrdma_mr_seg *seg, > struct rpcrdma_mw *mw; > struct rpcrdma_frmr *frmr; > struct ib_mr *mr; > - struct ib_fast_reg_wr fastreg_wr; > + struct ib_reg_wr reg_wr; > struct ib_send_wr *bad_wr; > + unsigned int dma_nents; > u8 key; > - int len, pageoff; > - int i, rc; > - int seg_len; > - u64 pa; > - int page_no; > + int i, rc, len, n; > > mw = seg1->rl_mw; > seg1->rl_mw = NULL; > @@ -332,64 +333,81 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct > rpcrdma_mr_seg *seg, > } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); > frmr = &mw->r.frmr; > frmr->fr_state = FRMR_IS_VALID; > + mr = frmr->fr_mr; > > - pageoff = offset_in_page(seg1->mr_offset); > - seg1->mr_offset -= pageoff; /* start of page */ > - seg1->mr_len += pageoff; > - len = -pageoff; > if (nsegs > ia->ri_max_frmr_depth) > nsegs = ia->ri_max_frmr_depth; > > - for (page_no = i = 0; i < nsegs;) { > - rpcrdma_map_one(device, seg, direction); > - pa = seg->mr_dma; > - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { > - frmr->fr_pgl->page_list[page_no++] = pa; > - pa += PAGE_SIZE; > - } > + for (len = 0, i = 0; i < nsegs;) { > + if (seg->mr_page) > + sg_set_page(&frmr->sg[i], > + seg->mr_page, > + seg->mr_len, > + offset_in_page(seg->mr_offset)); > + else > + sg_set_buf(&frmr->sg[i], seg->mr_offset, > +seg->mr_len); > + > len += seg->mr_len; > ++seg; > ++i; > + > /* Check for holes */ > if ((i < nsegs && offset_in_page(seg->mr_offset)) || > offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) > break; > } > + frmr->sg_nents = i; > + > + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); > + if (!dma_nents) { > + pr_err("RPC: %s: failed to dma map sg %p sg_nents %d\n", > +__func__, frmr->sg, frmr->sg_nents); > + return -ENOMEM; > + } > + > + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); > + if (unlikely(n != frmr->sg_nents)) { > + pr_err("RPC: %s: failed to map mr %p (%d/%d)\n", > +__func__, frmr->fr_mr, n, frmr->sg_nents); > + rc = n < 0 ? n : -EINVAL; > + goto out_senderr; > + } > + > dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", > - __func__, mw, i, len); > - > - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); > - f
[PATCH v3 12/26] xprtrdma: Port to new memory registration API
Instead of maintaining a fastreg page list, keep an sg table and convert an array of pages to a sg list. Then call ib_map_mr_sg and construct ib_reg_wr. Note that the next step would be to have NFS work with sg lists as it maps well to sk_frags (see comment from hch http://marc.info/?l=linux-rdma&m=143677002622296&w=2). Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Tested-by: Selvin Xavier --- net/sunrpc/xprtrdma/frwr_ops.c | 112 +++- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +- 2 files changed, 67 insertions(+), 48 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 0d2f46f600b6..3c89f8052786 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); - if (IS_ERR(f->fr_pgl)) + + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); + if (!f->sg) goto out_list_err; + + sg_init_table(f->sg, depth); + return 0; out_mr_err: @@ -163,7 +167,7 @@ out_mr_err: return rc; out_list_err: - rc = PTR_ERR(f->fr_pgl); + rc = -ENOMEM; dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", __func__, rc); ib_dereg_mr(f->fr_mr); @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r) if (rc) dprintk("RPC: %s: ib_dereg_mr status %i\n", __func__, rc); - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + kfree(r->r.frmr.sg); } static int @@ -312,14 +316,11 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; - struct ib_fast_reg_wr fastreg_wr; + struct ib_reg_wr reg_wr; struct ib_send_wr *bad_wr; + unsigned int dma_nents; u8 key; - int len, pageoff; - int i, rc; - int seg_len; - u64 pa; - int page_no; + int i, rc, len, n; mw = seg1->rl_mw; seg1->rl_mw = NULL; @@ -332,64 +333,81 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); frmr = &mw->r.frmr; frmr->fr_state = FRMR_IS_VALID; + mr = frmr->fr_mr; - pageoff = offset_in_page(seg1->mr_offset); - seg1->mr_offset -= pageoff; /* start of page */ - seg1->mr_len += pageoff; - len = -pageoff; if (nsegs > ia->ri_max_frmr_depth) nsegs = ia->ri_max_frmr_depth; - for (page_no = i = 0; i < nsegs;) { - rpcrdma_map_one(device, seg, direction); - pa = seg->mr_dma; - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { - frmr->fr_pgl->page_list[page_no++] = pa; - pa += PAGE_SIZE; - } + for (len = 0, i = 0; i < nsegs;) { + if (seg->mr_page) + sg_set_page(&frmr->sg[i], + seg->mr_page, + seg->mr_len, + offset_in_page(seg->mr_offset)); + else + sg_set_buf(&frmr->sg[i], seg->mr_offset, + seg->mr_len); + len += seg->mr_len; ++seg; ++i; + /* Check for holes */ if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } + frmr->sg_nents = i; + + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); + if (!dma_nents) { + pr_err("RPC: %s: failed to dma map sg %p sg_nents %d\n", + __func__, frmr->sg, frmr->sg_nents); + return -ENOMEM; + } + + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + if (unlikely(n != frmr->sg_nents)) { + pr_err("RPC: %s: failed to map mr %p (%d/%d)\n", + __func__, frmr->fr_mr, n, frmr->sg_nents); + rc = n < 0 ? n : -EINVAL; + goto out_senderr; + } + dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", - __func__, mw, i, len); - - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.iova_start = seg1->mr_dma + pageoff; - fastreg_wr.page_list = frmr->fr_pgl; -