Hi Steve-

I reviewed and successfully tested this patch with mlx4 using FRMR and MTHCAFMR.
A couple of cosmetic notes are below:


On Apr 10, 2014, at 11:13 AM, Steve Wise <sw...@opengridcomputing.com> wrote:

> Some rdma devices don't support a fast register page list depth of
> at least RPCRDMA_MAX_DATA_SEGS.  So xprtrdma needs to chunk its fast
> register regions according to the minimum of the device max supported
> depth or RPCRDMA_MAX_DATA_SEGS.
> 
> Signed-off-by: Steve Wise <sw...@opengridcomputing.com>
> ---
> 
> net/sunrpc/xprtrdma/rpc_rdma.c  |    2 -
> net/sunrpc/xprtrdma/verbs.c     |   64 ++++++++++++++++++++++++++++-----------
> net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
> 3 files changed, 47 insertions(+), 20 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index 96ead52..14d7634 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -249,8 +249,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct 
> xdr_buf *target,
>       req->rl_nchunks = nchunks;
> 
>       BUG_ON(nchunks == 0);

I don’t see a path through rpcrdma_create_chunks() where nchunks can be 0 here. 
If you
agree, can you remove the above BUG_ON as well, if you re-submit a new version 
of this
patch?

> -     BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
> -            && (nchunks > 3));
> 
>       /*
>        * finish off header. If write, marshal discrim and nchunks.
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 93726560..c08f193 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct 
> sockaddr *addr, int memreg)
>                               __func__);
>                       memreg = RPCRDMA_REGISTER;
> #endif
> +             } else {
> +                     /* Mind the ia limit on FRMR page list depth */
> +                     ia->ri_max_frmr_depth = min_t(unsigned int,
> +                             RPCRDMA_MAX_DATA_SEGS,
> +                             devattr.max_fast_reg_page_list_len);
>               }
>               break;
>       }
> @@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct 
> rpcrdma_ia *ia,
>       ep->rep_attr.srq = NULL;
>       ep->rep_attr.cap.max_send_wr = cdata->max_requests;
>       switch (ia->ri_memreg_strategy) {
> -     case RPCRDMA_FRMR:
> +     case RPCRDMA_FRMR: {
> +             int depth = 7;
> +
>               /* Add room for frmr register and invalidate WRs.
>                * 1. FRMR reg WR for head
>                * 2. FRMR invalidate WR for head
> -              * 3. FRMR reg WR for pagelist
> -              * 4. FRMR invalidate WR for pagelist
> +              * 3. N FRMR reg WRs for pagelist
> +              * 4. N FRMR invalidate WRs for pagelist
>                * 5. FRMR reg WR for tail
>                * 6. FRMR invalidate WR for tail
>                * 7. The RDMA_SEND WR
>                */
> -             ep->rep_attr.cap.max_send_wr *= 7;
> +
> +             /* Calculate N if the device max FRMR depth is smaller than
> +              * RPCRDMA_MAX_DATA_SEGS.
> +              */
> +             if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
> +                     int delta = RPCRDMA_MAX_DATA_SEGS -
> +                                 ia->ri_max_frmr_depth;
> +
> +                     do {
> +                             depth += 2; /* FRMR reg + invalidate */
> +                             delta -= ia->ri_max_frmr_depth;
> +                     } while (delta > 0);
> +
> +             }
> +             ep->rep_attr.cap.max_send_wr *= depth;
>               if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
> -                     cdata->max_requests = devattr.max_qp_wr / 7;
> +                     cdata->max_requests = devattr.max_qp_wr / depth;
>                       if (!cdata->max_requests)
>                               return -EINVAL;
> -                     ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
> +                     ep->rep_attr.cap.max_send_wr = cdata->max_requests *
> +                                                    depth;
>               }
>               break;
> +     }
>       case RPCRDMA_MEMWINDOWS_ASYNC:
>       case RPCRDMA_MEMWINDOWS:
>               /* Add room for mw_binds+unbinds - overkill! */
> @@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, 
> struct rpcrdma_ep *ep,
>       case RPCRDMA_FRMR:
>               for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
>                       r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
> -                                                      RPCRDMA_MAX_SEGS);
> +                                             ia->ri_max_frmr_depth);
>                       if (IS_ERR(r->r.frmr.fr_mr)) {
>                               rc = PTR_ERR(r->r.frmr.fr_mr);
>                               dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
>                                       " failed %i\n", __func__, rc);
>                               goto out;
>                       }
> -                     r->r.frmr.fr_pgl =
> -                             ib_alloc_fast_reg_page_list(ia->ri_id->device,
> -                                                         RPCRDMA_MAX_SEGS);
> +                     r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
> +                                             ia->ri_id->device,
> +                                             ia->ri_max_frmr_depth);
>                       if (IS_ERR(r->r.frmr.fr_pgl)) {
>                               rc = PTR_ERR(r->r.frmr.fr_pgl);
>                               dprintk("RPC:       %s: "
> @@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg 
> *seg,
>       seg1->mr_offset -= pageoff;     /* start of page */
>       seg1->mr_len += pageoff;
>       len = -pageoff;
> -     if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
> -             *nsegs = RPCRDMA_MAX_DATA_SEGS;
> +     if (*nsegs > ia->ri_max_frmr_depth)
> +             *nsegs = ia->ri_max_frmr_depth;
>       for (page_no = i = 0; i < *nsegs;) {
>               rpcrdma_map_one(ia, seg, writing);
>               pa = seg->mr_dma;
> @@ -1796,6 +1819,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
> {
>       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>       int rc = 0;
> +     int tmp_nsegs = nsegs;

I don’t understand what the changes to rpcrdma_register_external() do. As a 
test, I reverted
these, and the patch behaves the same. Am I missing something?

> 
>       switch (ia->ri_memreg_strategy) {
> 
> @@ -1805,35 +1829,39 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
>               seg->mr_rkey = ia->ri_bind_mem->rkey;
>               seg->mr_base = seg->mr_dma;
>               seg->mr_nsegs = 1;
> -             nsegs = 1;
> +             tmp_nsegs = 1;
>               break;
> #endif
> 
>       /* Registration using frmr registration */
>       case RPCRDMA_FRMR:
> -             rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, 
> r_xprt);
> +             rc = rpcrdma_register_frmr_external(seg, &tmp_nsegs, writing,
> +                                                 ia, r_xprt);
>               break;
> 
>       /* Registration using fmr memory registration */
>       case RPCRDMA_MTHCAFMR:
> -             rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
> +             rc = rpcrdma_register_fmr_external(seg, &tmp_nsegs, writing,
> +                                                ia);
>               break;
> 
>       /* Registration using memory windows */
>       case RPCRDMA_MEMWINDOWS_ASYNC:
>       case RPCRDMA_MEMWINDOWS:
> -             rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, 
> r_xprt);
> +             rc = rpcrdma_register_memwin_external(seg, &tmp_nsegs, writing,
> +                                                   ia, r_xprt);
>               break;
> 
>       /* Default registration each time */
>       default:
> -             rc = rpcrdma_register_default_external(seg, &nsegs, writing, 
> ia);
> +             rc = rpcrdma_register_default_external(seg, &tmp_nsegs, writing,
> +                                                    ia);
>               break;
>       }
>       if (rc)
>               return -1;
> 
> -     return nsegs;
> +     return tmp_nsegs;
> }
> 
> int
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index cc1445d..98340a3 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -66,6 +66,7 @@ struct rpcrdma_ia {
>       struct completion       ri_done;
>       int                     ri_async_rc;
>       enum rpcrdma_memreg     ri_memreg_strategy;
> +     unsigned int            ri_max_frmr_depth;
> };
> 
> /*
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to