On Jul 22, 2015, at 2:55 AM, Sagi Grimberg <[email protected]> wrote:

> Signed-off-by: Sagi Grimberg <[email protected]>
> ---
> net/sunrpc/xprtrdma/frwr_ops.c  | 80 ++++++++++++++++++++++-------------------
> net/sunrpc/xprtrdma/xprt_rdma.h |  4 ++-
> 2 files changed, 47 insertions(+), 37 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
> index 517efed..e28246b 100644
> --- a/net/sunrpc/xprtrdma/frwr_ops.c
> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
> @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, 
> struct ib_device *device,
>       f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_FAST_REG, depth, 0);
>       if (IS_ERR(f->fr_mr))
>               goto out_mr_err;
> -     f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
> -     if (IS_ERR(f->fr_pgl))
> +
> +     f->sg = kcalloc(sizeof(*f->sg), depth, GFP_KERNEL);
> +     if (IS_ERR(f->sg))
>               goto out_list_err;
> +
> +     sg_init_table(f->sg, depth);
> +
>       return 0;
> 
> out_mr_err:
> @@ -163,7 +167,7 @@ out_mr_err:
>       return rc;
> 
> out_list_err:
> -     rc = PTR_ERR(f->fr_pgl);
> +     rc = -ENOMEM;
>       dprintk("RPC:       %s: ib_alloc_fast_reg_page_list status %i\n",
>               __func__, rc);
>       ib_dereg_mr(f->fr_mr);
> @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
>       if (rc)
>               dprintk("RPC:       %s: ib_dereg_mr status %i\n",
>                       __func__, rc);
> -     ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
> +     kfree(r->r.frmr.sg);
> }
> 
> static int
> @@ -320,10 +324,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct 
> rpcrdma_mr_seg *seg,
>       struct ib_send_wr fastreg_wr, *bad_wr;
>       u8 key;
>       int len, pageoff;
> -     int i, rc;
> -     int seg_len;
> -     u64 pa;
> -     int page_no;
> +     int i, rc, access;
> 
>       mw = seg1->rl_mw;
>       seg1->rl_mw = NULL;
> @@ -344,39 +345,46 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct 
> rpcrdma_mr_seg *seg,
>       if (nsegs > ia->ri_max_frmr_depth)
>               nsegs = ia->ri_max_frmr_depth;
> 
> -     for (page_no = i = 0; i < nsegs;) {
> -             rpcrdma_map_one(device, seg, direction);
> -             pa = seg->mr_dma;
> -             for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
> -                     frmr->fr_pgl->page_list[page_no++] = pa;
> -                     pa += PAGE_SIZE;
> -             }
> +     for (i = 0; i < nsegs;) {
> +             sg_set_page(&frmr->sg[i], seg->mr_page,
> +                         seg->mr_len, offset_in_page(seg->mr_offset));

Cautionary note: here we’re dealing with both the “contiguous
set of pages” case and the “small region of bytes in a single page”
case. See rpcrdma_convert_iovs(): sometimes RPC send or receive
buffers can be registered (RDMA_NOMSG).


>               len += seg->mr_len;
> -             ++seg;
>               ++i;
> -             /* Check for holes */
> +             ++seg;
> +
> +             /* Check for holes - needed?? */
>               if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
>                   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
>                       break;
>       }
> +
> +     frmr->sg_nents = i;
> +     frmr->dma_nents = ib_dma_map_sg(device, frmr->sg,
> +                                     frmr->sg_nents, direction);
> +     if (!frmr->dma_nents) {
> +             pr_err("RPC:       %s: failed to dma map sg %p sg_nents %d\n",
> +                     __func__, frmr->sg, frmr->sg_nents);
> +             return -ENOMEM;
> +     }
> +
>       dprintk("RPC:       %s: Using frmr %p to map %d segments (%d bytes)\n",
>               __func__, mw, i, len);
> 
> -     memset(&fastreg_wr, 0, sizeof(fastreg_wr));
> -     fastreg_wr.wr_id = (unsigned long)(void *)mw;
> -     fastreg_wr.opcode = IB_WR_FAST_REG_MR;
> -     fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff;
> -     fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
> -     fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
> -     fastreg_wr.wr.fast_reg.page_list_len = page_no;
> -     fastreg_wr.wr.fast_reg.length = len;
> -     fastreg_wr.wr.fast_reg.access_flags = writing ?
> -                             IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
> -                             IB_ACCESS_REMOTE_READ;
>       mr = frmr->fr_mr;
> +     access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
> +                        IB_ACCESS_REMOTE_READ;
> +     rc = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, access);

I like this (and the matching ib_dma_unmap_sg). But why wouldn’t
this function be called ib_dma_map_sg() ? The name ib_map_mr_sg()
had me thinking for a moment that this API actually posted the
FASTREG WR, but I see that it doesn’t.


> +     if (rc) {
> +             pr_err("RPC:       %s: failed to map mr %p rc %d\n",
> +                     __func__, frmr->fr_mr, rc);
> +             return rc;
> +     }
> +
>       key = (u8)(mr->rkey & 0x000000FF);
>       ib_update_fast_reg_key(mr, ++key);
> -     fastreg_wr.wr.fast_reg.rkey = mr->rkey;
> +
> +     memset(&fastreg_wr, 0, sizeof(fastreg_wr));
> +     ib_set_fastreg_wr(mr, mr->rkey, (uintptr_t)mw, false, &fastreg_wr);
> 
>       DECR_CQCOUNT(&r_xprt->rx_ep);
>       rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
> @@ -385,15 +393,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct 
> rpcrdma_mr_seg *seg,
> 
>       seg1->rl_mw = mw;
>       seg1->mr_rkey = mr->rkey;
> -     seg1->mr_base = seg1->mr_dma + pageoff;
> +     seg1->mr_base = mr->iova;
>       seg1->mr_nsegs = i;
>       seg1->mr_len = len;
>       return i;
> 
> out_senderr:
>       dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
> -     while (i--)
> -             rpcrdma_unmap_one(device, --seg);
> +     ib_dma_unmap_sg(device, frmr->sg, frmr->sg_nents, direction);
>       __frwr_queue_recovery(mw);
>       return rc;
> }
> @@ -407,22 +414,23 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct 
> rpcrdma_mr_seg *seg)
>       struct rpcrdma_mr_seg *seg1 = seg;
>       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>       struct rpcrdma_mw *mw = seg1->rl_mw;
> +     struct rpcrdma_frmr *frmr = &mw->r.frmr;
>       struct ib_send_wr invalidate_wr, *bad_wr;
>       int rc, nsegs = seg->mr_nsegs;
> 
>       dprintk("RPC:       %s: FRMR %p\n", __func__, mw);
> 
>       seg1->rl_mw = NULL;
> -     mw->r.frmr.fr_state = FRMR_IS_INVALID;
> +     frmr->fr_state = FRMR_IS_INVALID;
> 
>       memset(&invalidate_wr, 0, sizeof(invalidate_wr));
>       invalidate_wr.wr_id = (unsigned long)(void *)mw;
>       invalidate_wr.opcode = IB_WR_LOCAL_INV;
> -     invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
> +     invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey;
>       DECR_CQCOUNT(&r_xprt->rx_ep);
> 
> -     while (seg1->mr_nsegs--)
> -             rpcrdma_unmap_one(ia->ri_device, seg++);
> +     ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);

->mr_dir was previously set by rpcrdma_map_one(), which you’ve replaced
with ib_map_mr_sg(). So maybe frwr_op_map() needs to save “direction”
in the rpcrdma_frmr.


> +
>       read_lock(&ia->ri_qplock);
>       rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
>       read_unlock(&ia->ri_qplock);
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index 886f8c8..a1c3ab2b 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -195,7 +195,9 @@ enum rpcrdma_frmr_state {
> };
> 
> struct rpcrdma_frmr {
> -     struct ib_fast_reg_page_list    *fr_pgl;
> +     struct scatterlist              *sg;
> +     unsigned int                    sg_nents;
> +     unsigned int                    dma_nents;
>       struct ib_mr                    *fr_mr;
>       enum rpcrdma_frmr_state         fr_state;
>       struct work_struct              fr_work;

--
Chuck Lever



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to