RE: [ofa-general] [PATCH v5] IB/mlx4: shrinking WQE

Tang, Changqing Wed, 10 Oct 2007 09:10:34 -0700

Can you provide sample code to use these new features ?

--CQ


> -----Original Message-----
> From: [EMAIL PROTECTED] 
> [mailto:[EMAIL PROTECTED] On Behalf Of 
> Jack Morgenstein
> Sent: Wednesday, October 10, 2007 10:44 AM
> To: [email protected]
> Cc: Roland Dreier
> Subject: [ofa-general] [PATCH v5] IB/mlx4: shrinking WQE
> 
> commit c0aa89f0b295dd0c20b2ff2b1d2eca10cdc84f4b
> Author: Michael S. Tsirkin <[EMAIL PROTECTED]>
> Date:   Thu Aug 30 15:51:40 2007 +0300
> 
>     IB/mlx4: shrinking WQE
>     
>     ConnectX supports shrinking wqe, such that a single WR can include
>     multiple units of wqe_shift.  This way, WRs can differ in 
> size, and
>     do not have to be a power of 2 in size, saving memory and 
> speeding up
>     send WR posting.  Unfortunately, if we do this wqe_index 
> field in CQE
>     can't be used to look up the WR ID anymore, so do this only if
>     selective signalling is off.
>     
>     Further, on 32-bit platforms, we can't use vmap to make
>     the QP buffer virtually contigious. Thus we have to use
>     constant-sized WRs to make sure a WR is always fully within
>     a single page-sized chunk.
>     
>     Finally, we use WR with NOP opcode to avoid wrap-around
>     in the middle of WR. We set NoErrorCompletion bit to avoid getting
>     completions with error for NOP WRs. Since NEC is only supported
>     starting with firmware 2.2.232, we use constant-sized WRs
>     for older firmware. And, since MLX QPs only support SEND, we use
>     constant-sized WRs in this case.
> 
>     Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>
> 
> ---
> 
> Changes since v4: fix calls to stamp_send_wqe, and stamping placement
>                   inside post_nop_wqe.
> Found by regression, fixed by Jack Morgenstein. 
> Changes since v3: fix nop formatting.
> Found by Eli Cohen.
> Changes since v2: fix memory leak in mlx4_buf_alloc.
> Found by internal code review.
> changes since v1: add missing patch hunks
> 
> Index: infiniband/drivers/infiniband/hw/mlx4/cq.c
> ===================================================================
> --- infiniband.orig/drivers/infiniband/hw/mlx4/cq.c   
> 2007-10-10 17:12:05.184757000 +0200
> +++ infiniband/drivers/infiniband/hw/mlx4/cq.c        
> 2007-10-10 17:23:02.337140000 +0200
> @@ -331,6 +331,12 @@ static int mlx4_ib_poll_one(struct mlx4_
>       is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
>               MLX4_CQE_OPCODE_ERROR;
>  
> +     if (unlikely((cqe->owner_sr_opcode & 
> MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
> +                  is_send)) {
> +             printk(KERN_WARNING "Completion for NOP opcode 
> detected!\n");
> +             return -EINVAL;
> +     }
> +
>       if (!*cur_qp ||
>           (be32_to_cpu(cqe->my_qpn) & 0xffffff) != 
> (*cur_qp)->mqp.qpn) {
>               /*
> @@ -353,8 +359,10 @@ static int mlx4_ib_poll_one(struct mlx4_
>  
>       if (is_send) {
>               wq = &(*cur_qp)->sq;
> -             wqe_ctr = be16_to_cpu(cqe->wqe_index);
> -             wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
> +             if (!(*cur_qp)->sq_signal_bits) {
> +                     wqe_ctr = be16_to_cpu(cqe->wqe_index);
> +                     wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
> +             }
>               wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
>               ++wq->tail;
>       } else if ((*cur_qp)->ibqp.srq) {
> Index: infiniband/drivers/infiniband/hw/mlx4/mlx4_ib.h
> ===================================================================
> --- infiniband.orig/drivers/infiniband/hw/mlx4/mlx4_ib.h      
> 2007-10-10 17:21:17.844882000 +0200
> +++ infiniband/drivers/infiniband/hw/mlx4/mlx4_ib.h   
> 2007-10-10 17:23:02.341138000 +0200
> @@ -120,6 +120,8 @@ struct mlx4_ib_qp {
>  
>       u32                     doorbell_qpn;
>       __be32                  sq_signal_bits;
> +     unsigned                sq_next_wqe;
> +     int                     sq_max_wqes_per_wr;
>       int                     sq_spare_wqes;
>       struct mlx4_ib_wq       sq;
>  
> Index: infiniband/drivers/infiniband/hw/mlx4/qp.c
> ===================================================================
> --- infiniband.orig/drivers/infiniband/hw/mlx4/qp.c   
> 2007-10-10 17:21:17.853882000 +0200
> +++ infiniband/drivers/infiniband/hw/mlx4/qp.c        
> 2007-10-10 17:23:02.350137000 +0200
> @@ -30,6 +30,7 @@
>   * SOFTWARE.
>   */
>  
> +#include <linux/log2.h>
>  #include <rdma/ib_cache.h>
>  #include <rdma/ib_pack.h>
>  
> @@ -92,7 +93,7 @@ static int is_qp0(struct mlx4_ib_dev *de
>  
>  static void *get_wqe(struct mlx4_ib_qp *qp, int offset)  {
> -     if (qp->buf.nbufs == 1)
> +     if (BITS_PER_LONG == 64 || qp->buf.nbufs == 1)
>               return qp->buf.u.direct.buf + offset;
>       else
>               return qp->buf.u.page_list[offset >> 
> PAGE_SHIFT].buf + @@ -111,16 +112,88 @@ static void 
> *get_send_wqe(struct mlx4_ib
>  
>  /*
>   * Stamp a SQ WQE so that it is invalid if prefetched by marking the
> - * first four bytes of every 64 byte chunk with 0xffffffff, 
> except for
> - * the very first chunk of the WQE.
> + * first four bytes of every 64 byte chunk with
> + * 0x7FFFFFF | (invalid_ownership_value << 31).
> + *
> + * When max WR is than or equal to the WQE size,
> + * as an optimization, we can stamp WQE with 0xffffffff,
> + * and skip the very first chunk of the WQE.
>   */
> -static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
> +static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
>  {
> -     u32 *wqe = get_send_wqe(qp, n);
> +     u32 *wqe;
>       int i;
> +     int s;
> +     int ind;
> +     void *buf;
> +     __be32 stamp;
> +
> +     s = roundup(size, 1 << qp->sq.wqe_shift);
> +     if (qp->sq_max_wqes_per_wr > 1) {
> +             for (i = 0; i < s; i += 64) {
> +                     ind = (i >> qp->sq.wqe_shift) + n;
> +                     stamp = ind & qp->sq.wqe_cnt ?  
> cpu_to_be32(0x7fffffff) :
> +                                                     
> cpu_to_be32(0xffffffff);
> +                     buf = get_send_wqe(qp, ind & 
> (qp->sq.wqe_cnt - 1));
> +                     wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
> +                     *wqe = stamp;
> +             }
> +     } else {
> +             buf = get_send_wqe(qp, n);
> +             for (i = 64; i < s; i += 64) {
> +                     wqe = buf + i;
> +                     *wqe = 0xffffffff;
> +             }
> +     }
> +}
> +
> +static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) {
> +     struct mlx4_wqe_ctrl_seg *ctrl;
> +     struct mlx4_wqe_inline_seg *inl;
> +     void *wqe;
> +     int s;
> +
> +     ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
> +     s = sizeof(struct mlx4_wqe_ctrl_seg);
> +
> +     if (qp->ibqp.qp_type == IB_QPT_UD) {
> +             struct mlx4_wqe_datagram_seg *dgram = wqe + 
> sizeof *ctrl;
> +             struct mlx4_av *av = (struct mlx4_av *)dgram->av;
> +             memset(dgram, 0, sizeof *dgram);
> +             av->port_pd = cpu_to_be32((qp->port << 24) | 
> to_mpd(qp->ibqp.pd)->pdn);
> +             s += sizeof(struct mlx4_wqe_datagram_seg);
> +     }
> +
> +     /* Pad the remainder of the WQE with an inline data segment. */
> +     if (size > s) {
> +             inl = wqe + s;
> +             inl->byte_count = cpu_to_be32(1 << 31 | (size - 
> s - sizeof *inl));
> +     }
> +     ctrl->srcrb_flags = 0;
> +     ctrl->fence_size = size / 16;
> +     /*
> +      * Make sure descriptor is fully written before
> +      * setting ownership bit (because HW can start
> +      * executing as soon as we do).
> +      */
> +     wmb();
>  
> -     for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16)
> -             wqe[i] = 0xffffffff;
> +     ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | 
> MLX4_WQE_CTRL_NEC) |
> +             (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
> +
> +     stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); }
> +
> +/* Post NOP WQE to prevent wrap-around in the middle of WR */ static 
> +inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) {
> +     unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
> +     if (unlikely(s < qp->sq_max_wqes_per_wr)) {
> +             post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
> +             ind += s;
> +     }
> +     return ind;
>  }
>  
>  static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum 
> mlx4_event type) @@ -237,6 +310,8 @@ static int 
> set_rq_size(struct mlx4_ib_de  static int 
> set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
>                             enum ib_qp_type type, struct 
> mlx4_ib_qp *qp)  {
> +     int s;
> +
>       /* Sanity check SQ size before proceeding */
>       if (cap->max_send_wr     > dev->dev->caps.max_wqes  ||
>           cap->max_send_sge    > dev->dev->caps.max_sq_sg ||
> @@ -252,20 +327,69 @@ static int set_kernel_sq_size(struct mlx
>           cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
>               return -EINVAL;
>  
> -     qp->sq.wqe_shift = 
> ilog2(roundup_pow_of_two(max(cap->max_send_sge *
> -                                                     sizeof 
> (struct mlx4_wqe_data_seg),
> -                                                     
> cap->max_inline_data +
> -                                                     sizeof 
> (struct mlx4_wqe_inline_seg)) +
> -                                                 
send_wqe_overhead(type)));
> -     qp->sq.max_gs    = ((1 << qp->sq.wqe_shift) - 
> send_wqe_overhead(type)) /
> -             sizeof (struct mlx4_wqe_data_seg);
> +     s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
> +             cap->max_inline_data + sizeof (struct 
> mlx4_wqe_inline_seg)) +
> +             send_wqe_overhead(type);
>  
>       /*
> -      * We need to leave 2 KB + 1 WQE of headroom in the SQ to
> -      * allow HW to prefetch.
> +      * Hermon supports shrinking wqe, such that a single WR 
> can include
> +      * multiple units of wqe_shift.  This way, WRs can 
> differ in size, and
> +      * do not have to be a power of 2 in size, saving 
> memory and speeding up
> +      * send WR posting.  Unfortunately, if we do this 
> wqe_index field in CQE
> +      * can't be used to look up the WR ID anymore, so do 
> this only if
> +      * selective signalling is off.
> +      *
> +      * Further, on 32-bit platforms, we can't use vmap to make
> +      * the QP buffer virtually contigious. Thus we have to use
> +      * constant-sized WRs to make sure a WR is always fully within
> +      * a single page-sized chunk.
> +      *
> +      * Finally, we use NOP opcode to avoid wrap-around in 
> the middle of WR.
> +      * We set NEC bit to avoid getting completions with 
> error for NOP WRs.
> +      * Since NEC is only supported starting with firmware 2.2.232,
> +      * we use constant-sized WRs for older firmware.
> +      *
> +      * And, since MLX QPs only support SEND, we use 
> constant-sized WRs in this
> +      * case.
> +      *
> +      * We look for the smallest value of wqe_shift such 
> that the resulting
> +      * number of wqes does not exceed device capabilities.
> +      *
> +      * We set WQE size to at least 64 bytes, this way 
> stamping invalidates each WQE.
>        */
> -     qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
> -     qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + 
> qp->sq_spare_wqes);
> +     if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
> +         qp->sq_signal_bits && BITS_PER_LONG == 64 &&
> +         type != IB_QPT_SMI && type != IB_QPT_GSI)
> +             qp->sq.wqe_shift = ilog2(64);
> +     else
> +             qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
> +
> +     for (;;) {
> +             if (1 << qp->sq.wqe_shift > 
> dev->dev->caps.max_sq_desc_sz)
> +                     return -EINVAL;
> +
> +             qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1 << 
> qp->sq.wqe_shift);
> +
> +             /*
> +              * We need to leave 2 KB + 1 WR of headroom in the SQ to
> +              * allow HW to prefetch.
> +              */
> +             qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) 
> + qp->sq_max_wqes_per_wr;
> +             qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
> +                                                 
> qp->sq_max_wqes_per_wr +
> +                                                 qp->sq_spare_wqes);
> +
> +             if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
> +                     break;
> +
> +             if (qp->sq_max_wqes_per_wr <= 1)
> +                     return -EINVAL;
> +
> +             ++qp->sq.wqe_shift;
> +     }
> +
> +     qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) -
> +                      send_wqe_overhead(type)) / sizeof 
> (struct mlx4_wqe_data_seg);
>  
>       qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
>               (qp->sq.wqe_cnt << qp->sq.wqe_shift); @@ -277,7 
> +401,8 @@ static int set_kernel_sq_size(struct mlx
>               qp->sq.offset = 0;
>       }
>  
> -     cap->max_send_wr  = qp->sq.max_post = qp->sq.wqe_cnt - 
> qp->sq_spare_wqes;
> +     cap->max_send_wr  = qp->sq.max_post =
> +             (qp->sq.wqe_cnt - qp->sq_spare_wqes) / 
> qp->sq_max_wqes_per_wr;
>       cap->max_send_sge = qp->sq.max_gs;
>       /* We don't support inline sends for kernel QPs (yet) */
>       cap->max_inline_data = 0;
> @@ -315,6 +440,12 @@ static int create_qp_common(struct mlx4_
>       qp->rq.tail         = 0;
>       qp->sq.head         = 0;
>       qp->sq.tail         = 0;
> +     qp->sq_next_wqe     = 0;
> +
> +     if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
> +             qp->sq_signal_bits = 
> cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
> +     else
> +             qp->sq_signal_bits = 0;
>  
>       err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, 
> !!init_attr->srq, qp);
>       if (err)
> @@ -405,11 +536,6 @@ static int create_qp_common(struct mlx4_
>        */
>       qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
>  
> -     if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
> -             qp->sq_signal_bits = 
> cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
> -     else
> -             qp->sq_signal_bits = 0;
> -
>       qp->mqp.event = mlx4_ib_qp_event;
>  
>       return 0;
> @@ -904,7 +1030,7 @@ static int __mlx4_ib_modify_qp(struct ib
>                       ctrl = get_send_wqe(qp, i);
>                       ctrl->owner_opcode = cpu_to_be32(1 << 31);
>  
> -                     stamp_send_wqe(qp, i);
> +                     stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
>               }
>       }
>  
> @@ -1266,13 +1392,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp
>       unsigned long flags;
>       int nreq;
>       int err = 0;
> -     int ind;
> -     int size;
> +     unsigned ind;
> +     int uninitialized_var(stamp);
> +     int uninitialized_var(size);
>       int i;
>  
>       spin_lock_irqsave(&qp->rq.lock, flags);
>  
> -     ind = qp->sq.head;
> +     ind = qp->sq_next_wqe;
>  
>       for (nreq = 0; wr; ++nreq, wr = wr->next) {
>               if (mlx4_wq_overflow(&qp->sq, nreq, 
> qp->ibqp.send_cq)) { @@ -1288,7 +1415,7 @@ int 
> mlx4_ib_post_send(struct ib_qp *ibqp
>               }
>  
>               ctrl = wqe = get_send_wqe(qp, ind & 
> (qp->sq.wqe_cnt - 1));
> -             qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
> +             qp->sq.wrid[(qp->sq.head + nreq) & 
> (qp->sq.wqe_cnt - 1)] = wr->wr_id;
>  
>               ctrl->srcrb_flags =
>                       (wr->send_flags & IB_SEND_SIGNALED ?
> @@ -1401,16 +1528,23 @@ int mlx4_ib_post_send(struct ib_qp *ibqp
>               ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
>                       (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 
> << 31) : 0);
>  
> +             stamp = ind + qp->sq_spare_wqes;
> +             ind += DIV_ROUND_UP(size * 16, 1 << qp->sq.wqe_shift);
> +
>               /*
>                * We can improve latency by not stamping the last
>                * send queue WQE until after ringing the doorbell, so
>                * only stamp here if there are still more WQEs to post.
> +              *
> +              * Same optimization applies to padding with NOP wqe
> +              * in case of WQE shrinking (used to prevent wrap-around
> +              * in the middle of WR).
>                */
> -             if (wr->next)
> -                     stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &
> -                                    (qp->sq.wqe_cnt - 1));
> +             if (wr->next) {
> +                     stamp_send_wqe(qp, stamp, size * 16);
> +                     ind = pad_wraparound(qp, ind);
> +             }
>  
> -             ++ind;
>       }
>  
>  out:
> @@ -1432,8 +1566,10 @@ out:
>                */
>               mmiowb();
>  
> -             stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &
> -                            (qp->sq.wqe_cnt - 1));
> +             stamp_send_wqe(qp, stamp, size * 16);
> +
> +             ind = pad_wraparound(qp, ind);
> +             qp->sq_next_wqe = ind;
>       }
>  
>       spin_unlock_irqrestore(&qp->rq.lock, flags);
> Index: infiniband/drivers/net/mlx4/alloc.c
> ===================================================================
> --- infiniband.orig/drivers/net/mlx4/alloc.c  2007-10-10 
> 17:12:12.259502000 +0200
> +++ infiniband/drivers/net/mlx4/alloc.c       2007-10-10 
> 17:23:02.356137000 +0200
> @@ -151,6 +151,19 @@ int mlx4_buf_alloc(struct mlx4_dev *dev,
>  
>                       memset(buf->u.page_list[i].buf, 0, PAGE_SIZE);
>               }
> +
> +             if (BITS_PER_LONG == 64) {
> +                     struct page **pages;
> +                     pages = kmalloc(sizeof *pages * 
> buf->nbufs, GFP_KERNEL);
> +                     if (!pages)
> +                             goto err_free;
> +                     for (i = 0; i < buf->nbufs; ++i)
> +                             pages[i] = 
> virt_to_page(buf->u.page_list[i].buf);
> +                     buf->u.direct.buf = vmap(pages, 
> buf->nbufs, VM_MAP, PAGE_KERNEL);
> +                     kfree(pages);
> +                     if (!buf->u.direct.buf)
> +                             goto err_free;
> +             }
>       }
>  
>       return 0;
> @@ -170,6 +183,9 @@ void mlx4_buf_free(struct mlx4_dev *dev,
>               dma_free_coherent(&dev->pdev->dev, size, 
> buf->u.direct.buf,
>                                 buf->u.direct.map);
>       else {
> +             if (BITS_PER_LONG == 64)
> +                     vunmap(buf->u.direct.buf);
> +
>               for (i = 0; i < buf->nbufs; ++i)
>                       dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
>                                         buf->u.page_list[i].buf,
> Index: infiniband/include/linux/mlx4/device.h
> ===================================================================
> --- infiniband.orig/include/linux/mlx4/device.h       
> 2007-10-10 17:21:17.954882000 +0200
> +++ infiniband/include/linux/mlx4/device.h    2007-10-10 
> 17:23:02.363137000 +0200
> @@ -133,6 +133,11 @@ enum {
>       MLX4_STAT_RATE_OFFSET   = 5
>  };
>  
> +static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) {
> +     return (major << 32) | (minor << 16) | subminor; }
> +
>  struct mlx4_caps {
>       u64                     fw_ver;
>       int                     num_ports;
> @@ -189,7 +194,7 @@ struct mlx4_buf_list {  };
>  
>  struct mlx4_buf {
> -     union {
> +     struct {
>               struct mlx4_buf_list    direct;
>               struct mlx4_buf_list   *page_list;
>       } u;
> Index: infiniband/include/linux/mlx4/qp.h
> ===================================================================
> --- infiniband.orig/include/linux/mlx4/qp.h   2007-10-10 
> 17:12:38.460566000 +0200
> +++ infiniband/include/linux/mlx4/qp.h        2007-10-10 
> 17:23:02.366140000 +0200
> @@ -154,7 +154,11 @@ struct mlx4_qp_context {
>       u32                     reserved5[10];
>  };
>  
> +/* Which firmware version adds support for NEC 
> (NoErrorCompletion) bit 
> +*/ #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232)
> +
>  enum {
> +     MLX4_WQE_CTRL_NEC       = 1 << 29,
>       MLX4_WQE_CTRL_FENCE     = 1 << 6,
>       MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2,
>       MLX4_WQE_CTRL_SOLICITED = 1 << 1,
> _______________________________________________
> general mailing list
> [email protected]
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
> 
> To unsubscribe, please visit 
> http://openib.org/mailman/listinfo/openib-general
> 
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

RE: [ofa-general] [PATCH v5] IB/mlx4: shrinking WQE

Reply via email to