Bypass Verbs to improve RX performance.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
Signed-off-by: Yaacov Hazan <yaacovh at mellanox.com>
Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
Signed-off-by: Vasily Philipov <vasilyf at mellanox.com>
---
 drivers/net/mlx5/mlx5_ethdev.c |   4 +-
 drivers/net/mlx5/mlx5_fdir.c   |   2 +-
 drivers/net/mlx5/mlx5_rxq.c    | 303 ++++++++++++++++++++---------------------
 drivers/net/mlx5/mlx5_rxtx.c   | 289 ++++++++++++++++++++-------------------
 drivers/net/mlx5/mlx5_rxtx.h   |  38 +++---
 drivers/net/mlx5/mlx5_vlan.c   |   3 +-
 6 files changed, 325 insertions(+), 314 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 759434e..16b05d3 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1263,7 +1263,9 @@ mlx5_secondary_data_setup(struct priv *priv)
        }
        /* RX queues. */
        for (i = 0; i != nb_rx_queues; ++i) {
-               struct rxq *primary_rxq = (*sd->primary_priv->rxqs)[i];
+               struct rxq_ctrl *primary_rxq =
+                       container_of((*sd->primary_priv->rxqs)[i],
+                                    struct rxq_ctrl, rxq);

                if (primary_rxq == NULL)
                        continue;
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 1850218..73eb00e 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -431,7 +431,7 @@ priv_get_fdir_queue(struct priv *priv, uint16_t idx)
        ind_init_attr = (struct ibv_exp_rwq_ind_table_init_attr){
                .pd = priv->pd,
                .log_ind_tbl_size = 0,
-               .ind_tbl = &((*priv->rxqs)[idx]->wq),
+               .ind_tbl = &rxq_ctrl->wq,
                .comp_mask = 0,
        };

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 7db4ce7..a8f68a3 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -43,6 +43,8 @@
 #pragma GCC diagnostic ignored "-pedantic"
 #endif
 #include <infiniband/verbs.h>
+#include <infiniband/arch.h>
+#include <infiniband/mlx5_hw.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -373,8 +375,13 @@ priv_create_hash_rxqs(struct priv *priv)
                DEBUG("indirection table extended to assume %u WQs",
                      priv->reta_idx_n);
        }
-       for (i = 0; (i != priv->reta_idx_n); ++i)
-               wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq;
+       for (i = 0; (i != priv->reta_idx_n); ++i) {
+               struct rxq_ctrl *rxq_ctrl;
+
+               rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
+                                       struct rxq_ctrl, rxq);
+               wqs[i] = rxq_ctrl->wq;
+       }
        /* Get number of hash RX queues to configure. */
        for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
                hash_rxqs_n += ind_table_init[i].hash_types_n;
@@ -638,21 +645,13 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int 
elts_n,
               struct rte_mbuf **pool)
 {
        unsigned int i;
-       struct rxq_elt (*elts)[elts_n] =
-               rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
-                                 rxq_ctrl->socket);
        int ret = 0;

-       if (elts == NULL) {
-               ERROR("%p: can't allocate packets array", (void *)rxq_ctrl);
-               ret = ENOMEM;
-               goto error;
-       }
        /* For each WR (packet). */
        for (i = 0; (i != elts_n); ++i) {
-               struct rxq_elt *elt = &(*elts)[i];
-               struct ibv_sge *sge = &(*elts)[i].sge;
                struct rte_mbuf *buf;
+               volatile struct mlx5_wqe_data_seg *scat =
+                       &(*rxq_ctrl->rxq.wqes)[i];

                if (pool != NULL) {
                        buf = *(pool++);
@@ -666,40 +665,36 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int 
elts_n,
                        ret = ENOMEM;
                        goto error;
                }
-               elt->buf = buf;
                /* Headroom is reserved by rte_pktmbuf_alloc(). */
                assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
                /* Buffer is supposed to be empty. */
                assert(rte_pktmbuf_data_len(buf) == 0);
                assert(rte_pktmbuf_pkt_len(buf) == 0);
-               /* sge->addr must be able to store a pointer. */
-               assert(sizeof(sge->addr) >= sizeof(uintptr_t));
-               /* SGE keeps its headroom. */
-               sge->addr = (uintptr_t)
-                       ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
-               sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
-               sge->lkey = rxq_ctrl->mr->lkey;
-               /* Redundant check for tailroom. */
-               assert(sge->length == rte_pktmbuf_tailroom(buf));
+               assert(!buf->next);
+               PORT(buf) = rxq_ctrl->rxq.port_id;
+               DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
+               PKT_LEN(buf) = DATA_LEN(buf);
+               NB_SEGS(buf) = 1;
+               /* scat->addr must be able to store a pointer. */
+               assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+               *scat = (struct mlx5_wqe_data_seg){
+                       .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
+                       .byte_count = htonl(DATA_LEN(buf)),
+                       .lkey = htonl(rxq_ctrl->mr->lkey),
+               };
+               (*rxq_ctrl->rxq.elts)[i] = buf;
        }
        DEBUG("%p: allocated and configured %u single-segment WRs",
              (void *)rxq_ctrl, elts_n);
-       rxq_ctrl->rxq.elts_n = elts_n;
-       rxq_ctrl->rxq.elts_head = 0;
-       rxq_ctrl->rxq.elts = elts;
        assert(ret == 0);
        return 0;
 error:
-       if (elts != NULL) {
-               assert(pool == NULL);
-               for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-                       struct rxq_elt *elt = &(*elts)[i];
-                       struct rte_mbuf *buf = elt->buf;
-
-                       if (buf != NULL)
-                               rte_pktmbuf_free_seg(buf);
-               }
-               rte_free(elts);
+       assert(pool == NULL);
+       elts_n = i;
+       for (i = 0; (i != elts_n); ++i) {
+               if ((*rxq_ctrl->rxq.elts)[i] != NULL)
+                       rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
+               (*rxq_ctrl->rxq.elts)[i] = NULL;
        }
        DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
        assert(ret > 0);
@@ -716,22 +711,16 @@ static void
 rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
 {
        unsigned int i;
-       unsigned int elts_n = rxq_ctrl->rxq.elts_n;
-       struct rxq_elt (*elts)[elts_n] = rxq_ctrl->rxq.elts;

        DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
-       rxq_ctrl->rxq.elts_n = 0;
-       rxq_ctrl->rxq.elts = NULL;
-       if (elts == NULL)
+       if (rxq_ctrl->rxq.elts == NULL)
                return;
-       for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-               struct rxq_elt *elt = &(*elts)[i];
-               struct rte_mbuf *buf = elt->buf;

-               if (buf != NULL)
-                       rte_pktmbuf_free_seg(buf);
+       for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) {
+               if ((*rxq_ctrl->rxq.elts)[i] != NULL)
+                       rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
+               (*rxq_ctrl->rxq.elts)[i] = NULL;
        }
-       rte_free(elts);
 }

 /**
@@ -749,42 +738,40 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)

        DEBUG("cleaning up %p", (void *)rxq_ctrl);
        rxq_free_elts(rxq_ctrl);
-       rxq_ctrl->rxq.poll = NULL;
-       rxq_ctrl->rxq.recv = NULL;
        if (rxq_ctrl->if_wq != NULL) {
-               assert(rxq_ctrl->rxq.priv != NULL);
-               assert(rxq_ctrl->rxq.priv->ctx != NULL);
-               assert(rxq_ctrl->rxq.wq != NULL);
+               assert(rxq_ctrl->priv != NULL);
+               assert(rxq_ctrl->priv->ctx != NULL);
+               assert(rxq_ctrl->wq != NULL);
                params = (struct ibv_exp_release_intf_params){
                        .comp_mask = 0,
                };
-               claim_zero(ibv_exp_release_intf(rxq_ctrl->rxq.priv->ctx,
+               claim_zero(ibv_exp_release_intf(rxq_ctrl->priv->ctx,
                                                rxq_ctrl->if_wq,
                                                &params));
        }
        if (rxq_ctrl->if_cq != NULL) {
-               assert(rxq_ctrl->rxq.priv != NULL);
-               assert(rxq_ctrl->rxq.priv->ctx != NULL);
-               assert(rxq_ctrl->rxq.cq != NULL);
+               assert(rxq_ctrl->priv != NULL);
+               assert(rxq_ctrl->priv->ctx != NULL);
+               assert(rxq_ctrl->cq != NULL);
                params = (struct ibv_exp_release_intf_params){
                        .comp_mask = 0,
                };
-               claim_zero(ibv_exp_release_intf(rxq_ctrl->rxq.priv->ctx,
+               claim_zero(ibv_exp_release_intf(rxq_ctrl->priv->ctx,
                                                rxq_ctrl->if_cq,
                                                &params));
        }
-       if (rxq_ctrl->rxq.wq != NULL)
-               claim_zero(ibv_exp_destroy_wq(rxq_ctrl->rxq.wq));
-       if (rxq_ctrl->rxq.cq != NULL)
-               claim_zero(ibv_destroy_cq(rxq_ctrl->rxq.cq));
+       if (rxq_ctrl->wq != NULL)
+               claim_zero(ibv_exp_destroy_wq(rxq_ctrl->wq));
+       if (rxq_ctrl->cq != NULL)
+               claim_zero(ibv_destroy_cq(rxq_ctrl->cq));
        if (rxq_ctrl->rd != NULL) {
                struct ibv_exp_destroy_res_domain_attr attr = {
                        .comp_mask = 0,
                };

-               assert(rxq_ctrl->rxq.priv != NULL);
-               assert(rxq_ctrl->rxq.priv->ctx != NULL);
-               claim_zero(ibv_exp_destroy_res_domain(rxq_ctrl->rxq.priv->ctx,
+               assert(rxq_ctrl->priv != NULL);
+               assert(rxq_ctrl->priv->ctx != NULL);
+               claim_zero(ibv_exp_destroy_res_domain(rxq_ctrl->priv->ctx,
                                                      rxq_ctrl->rd,
                                                      &attr));
        }
@@ -811,14 +798,13 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 int
 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 {
-       struct priv *priv = rxq_ctrl->rxq.priv;
+       struct priv *priv = rxq_ctrl->priv;
        struct rxq_ctrl tmpl = *rxq_ctrl;
        unsigned int mbuf_n;
        unsigned int desc_n;
        struct rte_mbuf **pool;
        unsigned int i, k;
        struct ibv_exp_wq_attr mod;
-       struct rxq_elt (*elts)[tmpl.rxq.elts_n];
        int err;

        DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq_ctrl);
@@ -840,7 +826,7 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl)
                .attr_mask = IBV_EXP_WQ_ATTR_STATE,
                .wq_state = IBV_EXP_WQS_RESET,
        };
-       err = ibv_exp_modify_wq(tmpl.rxq.wq, &mod);
+       err = ibv_exp_modify_wq(tmpl.wq, &mod);
        if (err) {
                ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
                assert(err > 0);
@@ -854,60 +840,33 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl)
        }
        /* Snatch mbufs from original queue. */
        k = 0;
-       elts = rxq_ctrl->rxq.elts;
-       for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-               struct rxq_elt *elt = &(*elts)[i];
-               struct rte_mbuf *buf = elt->buf;
-
-               pool[k++] = buf;
-       }
+       for (i = 0; (i != desc_n); ++i)
+               pool[k++] = (*rxq_ctrl->rxq.elts)[i];
        assert(k == mbuf_n);
-       tmpl.rxq.elts_n = 0;
-       tmpl.rxq.elts = NULL;
-       assert((void *)&tmpl.rxq.elts == NULL);
-       err = rxq_alloc_elts(&tmpl, desc_n, pool);
-       if (err) {
-               ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
-               rte_free(pool);
-               assert(err > 0);
-               return err;
-       }
-       assert(tmpl.rxq.elts_n == desc_n);
        rte_free(pool);
-       /* Clean up original data. */
-       rxq_ctrl->rxq.elts_n = 0;
-       rte_free(rxq_ctrl->rxq.elts);
-       rxq_ctrl->rxq.elts = NULL;
        /* Change queue state to ready. */
        mod = (struct ibv_exp_wq_attr){
                .attr_mask = IBV_EXP_WQ_ATTR_STATE,
                .wq_state = IBV_EXP_WQS_RDY,
        };
-       err = ibv_exp_modify_wq(tmpl.rxq.wq, &mod);
+       err = ibv_exp_modify_wq(tmpl.wq, &mod);
        if (err) {
                ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
                      (void *)dev, strerror(err));
                goto error;
        }
        /* Post SGEs. */
-       assert(tmpl.if_wq != NULL);
-       elts = tmpl.rxq.elts;
-       for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-               err = tmpl.if_wq->recv_burst(
-                       tmpl.rxq.wq,
-                       &(*elts)[i].sge,
-                       1);
-               if (err)
-                       break;
-       }
+       err = rxq_alloc_elts(&tmpl, desc_n, pool);
        if (err) {
-               ERROR("%p: failed to post SGEs with error %d",
-                     (void *)dev, err);
-               /* Set err because it does not contain a valid errno value. */
-               err = EIO;
-               goto error;
+               ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
+               rte_free(pool);
+               assert(err > 0);
+               return err;
        }
-       tmpl.rxq.recv = tmpl.if_wq->recv_burst;
+       /* Update doorbell counter. */
+       rxq_ctrl->rxq.rq_ci = desc_n;
+       rte_wmb();
+       *rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
 error:
        *rxq_ctrl = tmpl;
        assert(err >= 0);
@@ -915,6 +874,45 @@ error:
 }

 /**
+ * Initialize RX queue.
+ *
+ * @param tmpl
+ *   Pointer to RX queue control template.
+ * @param rxq_ctrl
+ *   Pointer to RX queue control.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static inline int
+rxq_setup(struct rxq_ctrl *tmpl, struct rxq_ctrl *rxq_ctrl)
+{
+       struct ibv_cq *ibcq = tmpl->cq;
+       struct mlx5_cq *cq = to_mxxx(cq, cq);
+       struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
+
+       if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) {
+               ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+                     "it should be set to %u", RTE_CACHE_LINE_SIZE);
+               return EINVAL;
+       }
+       tmpl->rxq.rq_db = rwq->rq.db;
+       tmpl->rxq.cq_ci = 0;
+       tmpl->rxq.rq_ci = 0;
+       tmpl->rxq.cq_db = cq->dbrec;
+       tmpl->rxq.wqes =
+               (volatile struct mlx5_wqe_data_seg (*)[])
+               (uintptr_t)rwq->rq.buff;
+       tmpl->rxq.cqes =
+               (volatile struct mlx5_cqe (*)[])
+               (uintptr_t)cq->active_buf->buf;
+       tmpl->rxq.elts =
+               (struct rte_mbuf *(*)[tmpl->rxq.elts_n])
+               ((uintptr_t)rxq_ctrl + sizeof(*rxq_ctrl));
+       return 0;
+}
+
+/**
  * Configure a RX queue.
  *
  * @param dev
@@ -934,15 +932,16 @@ error:
  *   0 on success, errno value on failure.
  */
 int
-rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
-         unsigned int socket, const struct rte_eth_rxconf *conf,
-         struct rte_mempool *mp)
+rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
+              uint16_t desc, unsigned int socket,
+              const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
 {
        struct priv *priv = dev->data->dev_private;
        struct rxq_ctrl tmpl = {
+               .priv = priv,
                .socket = socket,
                .rxq = {
-                       .priv = priv,
+                       .elts_n = desc,
                        .mp = mp,
                },
        };
@@ -952,17 +951,16 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl, uint16_t desc,
                struct ibv_exp_cq_init_attr cq;
                struct ibv_exp_res_domain_init_attr rd;
                struct ibv_exp_wq_init_attr wq;
+               struct ibv_exp_cq_attr cq_attr;
        } attr;
        enum ibv_exp_query_intf_status status;
        unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
-       struct rxq_elt (*elts)[desc];
        int ret = 0;
-       unsigned int i;
-       unsigned int cq_size = desc;

        (void)conf; /* Thresholds configuration (ignored). */
        if (desc == 0) {
-               ERROR("%p: invalid number of RX descriptors", (void *)dev);
+               ERROR("%p: invalid number of RX descriptors (must be a"
+                     " multiple of 2)", (void *)dev);
                return EINVAL;
        }
        /* Toggle RX checksum offload if hardware supports it. */
@@ -996,9 +994,9 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl, uint16_t desc,
                .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
                .res_domain = tmpl.rd,
        };
-       tmpl.rxq.cq = ibv_exp_create_cq(priv->ctx, cq_size, NULL, NULL, 0,
-                                       &attr.cq);
-       if (tmpl.rxq.cq == NULL) {
+       tmpl.cq = ibv_exp_create_cq(priv->ctx, desc - 1, NULL, NULL, 0,
+                                   &attr.cq);
+       if (tmpl.cq == NULL) {
                ret = ENOMEM;
                ERROR("%p: CQ creation failure: %s",
                      (void *)dev, strerror(ret));
@@ -1015,13 +1013,13 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl, uint16_t desc,
                .wq_context = NULL, /* Could be useful in the future. */
                .wq_type = IBV_EXP_WQT_RQ,
                /* Max number of outstanding WRs. */
-               .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)cq_size) ?
+               .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)desc) ?
                                priv->device_attr.max_qp_wr :
-                               (int)cq_size),
+                               (int)desc),
                /* Max number of scatter/gather elements in a WR. */
                .max_recv_sge = 1,
                .pd = priv->pd,
-               .cq = tmpl.rxq.cq,
+               .cq = tmpl.cq,
                .comp_mask =
                        IBV_EXP_CREATE_WQ_RES_DOMAIN |
                        IBV_EXP_CREATE_WQ_VLAN_OFFLOADS |
@@ -1064,19 +1062,13 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl, uint16_t desc,
                     " up to date",
                     (void *)dev);

-       tmpl.rxq.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
-       if (tmpl.rxq.wq == NULL) {
+       tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
+       if (tmpl.wq == NULL) {
                ret = (errno ? errno : EINVAL);
                ERROR("%p: WQ creation failure: %s",
                      (void *)dev, strerror(ret));
                goto error;
        }
-       ret = rxq_alloc_elts(&tmpl, desc, NULL);
-       if (ret) {
-               ERROR("%p: RXQ allocation failed: %s",
-                     (void *)dev, strerror(ret));
-               goto error;
-       }
        /* Save port ID. */
        tmpl.rxq.port_id = dev->data->port_id;
        DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
@@ -1084,7 +1076,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl, uint16_t desc,
                .intf_scope = IBV_EXP_INTF_GLOBAL,
                .intf_version = 1,
                .intf = IBV_EXP_INTF_CQ,
-               .obj = tmpl.rxq.cq,
+               .obj = tmpl.cq,
        };
        tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
        if (tmpl.if_cq == NULL) {
@@ -1095,7 +1087,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl, uint16_t desc,
        attr.params = (struct ibv_exp_query_intf_params){
                .intf_scope = IBV_EXP_INTF_GLOBAL,
                .intf = IBV_EXP_INTF_WQ,
-               .obj = tmpl.rxq.wq,
+               .obj = tmpl.wq,
        };
        tmpl.if_wq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
        if (tmpl.if_wq == NULL) {
@@ -1108,38 +1100,34 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl 
*rxq_ctrl, uint16_t desc,
                .attr_mask = IBV_EXP_WQ_ATTR_STATE,
                .wq_state = IBV_EXP_WQS_RDY,
        };
-       ret = ibv_exp_modify_wq(tmpl.rxq.wq, &mod);
+       ret = ibv_exp_modify_wq(tmpl.wq, &mod);
        if (ret) {
                ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
                      (void *)dev, strerror(ret));
                goto error;
        }
-       /* Post SGEs. */
-       elts = tmpl.rxq.elts;
-       for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-               ret = tmpl.if_wq->recv_burst(
-                       tmpl.rxq.wq,
-                       &(*elts)[i].sge,
-                       1);
-               if (ret)
-                       break;
+       ret = rxq_setup(&tmpl, rxq_ctrl);
+       if (ret) {
+               ERROR("%p: cannot initialize RX queue structure: %s",
+                     (void *)dev, strerror(ret));
+               goto error;
        }
+       ret = rxq_alloc_elts(&tmpl, desc, NULL);
        if (ret) {
-               ERROR("%p: failed to post SGEs with error %d",
-                     (void *)dev, ret);
-               /* Set ret because it does not contain a valid errno value. */
-               ret = EIO;
+               ERROR("%p: RXQ allocation failed: %s",
+                     (void *)dev, strerror(ret));
                goto error;
        }
        /* Clean up rxq in case we're reinitializing it. */
        DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
        rxq_cleanup(rxq_ctrl);
        *rxq_ctrl = tmpl;
+       /* Update doorbell counter. */
+       rxq_ctrl->rxq.rq_ci = desc;
+       rte_wmb();
+       *rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
        DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
        assert(ret == 0);
-       /* Assign function in queue. */
-       rxq_ctrl->rxq.poll = rxq_ctrl->if_cq->poll_length_flags_cvlan;
-       rxq_ctrl->rxq.recv = rxq_ctrl->if_wq->recv_burst;
        return 0;
 error:
        rxq_cleanup(&tmpl);
@@ -1173,14 +1161,19 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
idx, uint16_t desc,
 {
        struct priv *priv = dev->data->dev_private;
        struct rxq *rxq = (*priv->rxqs)[idx];
-       struct rxq_ctrl *rxq_ctrl;
+       struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
        int ret;

        if (mlx5_is_secondary())
                return -E_RTE_SECONDARY;

        priv_lock(priv);
-       rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+       if (!rte_is_power_of_2(desc)) {
+               desc = 1 << log2above(desc);
+               WARN("%p: increased number of descriptors in RX queue %u"
+                    " to the next power of two (%d)",
+                    (void *)dev, idx, desc);
+       }
        DEBUG("%p: configuring queue %u for %u descriptors",
              (void *)dev, idx, desc);
        if (idx >= priv->rxqs_n) {
@@ -1199,8 +1192,9 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
idx, uint16_t desc,
                (*priv->rxqs)[idx] = NULL;
                rxq_cleanup(rxq_ctrl);
        } else {
-               rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl), 0,
-                                            socket);
+               rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
+                                            desc * sizeof(struct rte_mbuf *),
+                                            0, socket);
                if (rxq_ctrl == NULL) {
                        ERROR("%p: unable to allocate queue index %u",
                              (void *)dev, idx);
@@ -1208,7 +1202,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
idx, uint16_t desc,
                        return -ENOMEM;
                }
        }
-       ret = rxq_setup(dev, rxq_ctrl, desc, socket, conf, mp);
+       ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
        if (ret)
                rte_free(rxq_ctrl);
        else {
@@ -1243,12 +1237,12 @@ mlx5_rx_queue_release(void *dpdk_rxq)
        if (rxq == NULL)
                return;
        rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
-       priv = rxq->priv;
+       priv = rxq_ctrl->priv;
        priv_lock(priv);
        for (i = 0; (i != priv->rxqs_n); ++i)
                if ((*priv->rxqs)[i] == rxq) {
                        DEBUG("%p: removing RX queue %p from list",
-                             (void *)priv->dev, (void *)rxq);
+                             (void *)priv->dev, (void *)rxq_ctrl);
                        (*priv->rxqs)[i] = NULL;
                        break;
                }
@@ -1278,7 +1272,8 @@ mlx5_rx_burst_secondary_setup(void *dpdk_rxq, struct 
rte_mbuf **pkts,
                              uint16_t pkts_n)
 {
        struct rxq *rxq = dpdk_rxq;
-       struct priv *priv = mlx5_secondary_data_setup(rxq->priv);
+       struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+       struct priv *priv = mlx5_secondary_data_setup(rxq_ctrl->priv);
        struct priv *primary_priv;
        unsigned int index;

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 6a0d707..27d8852 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -42,6 +42,8 @@
 #pragma GCC diagnostic ignored "-pedantic"
 #endif
 #include <infiniband/verbs.h>
+#include <infiniband/mlx5_hw.h>
+#include <infiniband/arch.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -55,7 +57,7 @@
 #include <rte_prefetch.h>
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
-#include <rte_memory.h>
+#include <rte_ether.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -65,6 +67,47 @@
 #include "mlx5_rxtx.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
+#include "mlx5_prm.h"
+
+static inline volatile struct mlx5_cqe64 *
+get_cqe64(volatile struct mlx5_cqe cqes[],
+         unsigned int cqes_n, uint16_t *ci)
+         __attribute__((always_inline));
+
+static inline int
+rx_poll_len(struct rxq *rxq) __attribute__((always_inline));
+
+static volatile struct mlx5_cqe64 *
+get_cqe64(volatile struct mlx5_cqe cqes[],
+         unsigned int cqes_n, uint16_t *ci)
+{
+       volatile struct mlx5_cqe64 *cqe;
+       uint16_t idx = *ci;
+       uint8_t op_own;
+
+       cqe = &cqes[idx & (cqes_n - 1)].cqe64;
+       op_own = cqe->op_own;
+       if (unlikely((op_own & MLX5_CQE_OWNER_MASK) == !(idx & cqes_n))) {
+               return NULL;
+       } else if (unlikely(op_own & 0x80)) {
+               switch (op_own >> 4) {
+               case MLX5_CQE_INVALID:
+                       return NULL; /* No CQE */
+               case MLX5_CQE_REQ_ERR:
+                       return cqe;
+               case MLX5_CQE_RESP_ERR:
+                       ++(*ci);
+                       return NULL;
+               default:
+                       return NULL;
+               }
+       }
+       if (cqe) {
+               *ci = idx + 1;
+               return cqe;
+       }
+       return NULL;
+}

 /**
  * Manage TX completions.
@@ -390,8 +433,8 @@ stop:
 /**
  * Translate RX completion flags to packet type.
  *
- * @param flags
- *   RX completion flags returned by poll_length_flags().
+ * @param[in] cqe
+ *   Pointer to CQE.
  *
  * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
  *
@@ -399,11 +442,13 @@ stop:
  *   Packet type for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_pkt_type(uint32_t flags)
+rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe)
 {
        uint32_t pkt_type;
+       uint8_t flags = cqe->l4_hdr_type_etc;
+       uint8_t info = cqe->rsvd0[0];

-       if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET)
+       if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET)
                pkt_type =
                        TRANSPOSE(flags,
                                  IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
@@ -420,11 +465,11 @@ rxq_cq_to_pkt_type(uint32_t flags)
        else
                pkt_type =
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_IPV4_PACKET,
-                                 RTE_PTYPE_L3_IPV4) |
+                                 MLX5_CQE_L3_HDR_TYPE_IPV6,
+                                 RTE_PTYPE_L3_IPV6) |
                        TRANSPOSE(flags,
-                                 IBV_EXP_CQ_RX_IPV6_PACKET,
-                                 RTE_PTYPE_L3_IPV6);
+                                 MLX5_CQE_L3_HDR_TYPE_IPV4,
+                                 RTE_PTYPE_L3_IPV4);
        return pkt_type;
 }

@@ -433,50 +478,69 @@ rxq_cq_to_pkt_type(uint32_t flags)
  *
  * @param[in] rxq
  *   Pointer to RX queue structure.
- * @param flags
- *   RX completion flags returned by poll_length_flags().
+ * @param[in] cqe
+ *   Pointer to CQE.
  *
  * @return
  *   Offload flags (ol_flags) for struct rte_mbuf.
  */
 static inline uint32_t
-rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
+rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe)
 {
        uint32_t ol_flags = 0;
+       uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK;
+       uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK;
+       uint8_t info = cqe->rsvd0[0];

-       if (rxq->csum) {
-               /* Set IP checksum flag only for IPv4/IPv6 packets. */
-               if (flags &
-                   (IBV_EXP_CQ_RX_IPV4_PACKET | IBV_EXP_CQ_RX_IPV6_PACKET))
-                       ol_flags |=
-                               TRANSPOSE(~flags,
-                                       IBV_EXP_CQ_RX_IP_CSUM_OK,
-                                       PKT_RX_IP_CKSUM_BAD);
-               /* Set L4 checksum flag only for TCP/UDP packets. */
-               if (flags &
-                   (IBV_EXP_CQ_RX_TCP_PACKET | IBV_EXP_CQ_RX_UDP_PACKET))
-                       ol_flags |=
-                               TRANSPOSE(~flags,
-                                       IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK,
-                                       PKT_RX_L4_CKSUM_BAD);
-       }
+       if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) ||
+           (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6))
+               ol_flags |=
+                       (!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) *
+                        PKT_RX_IP_CKSUM_BAD);
+       if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) ||
+           (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) ||
+           (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) ||
+           (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP))
+               ol_flags |=
+                       (!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) *
+                        PKT_RX_L4_CKSUM_BAD);
        /*
         * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
         * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
         * (its value is 0).
         */
-       if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
+       if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
                ol_flags |=
-                       TRANSPOSE(~flags,
+                       TRANSPOSE(~cqe->l4_hdr_type_etc,
                                  IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
                                  PKT_RX_IP_CKSUM_BAD) |
-                       TRANSPOSE(~flags,
+                       TRANSPOSE(~cqe->l4_hdr_type_etc,
                                  IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
                                  PKT_RX_L4_CKSUM_BAD);
        return ol_flags;
 }

 /**
+ * Get size of the next packet.
+ *
+ * @param rxq
+ *   RX queue to fetch packet from.
+ *
+ * @return
+ *   Packet size in bytes.
+ */
+static inline int __attribute__((always_inline))
+rx_poll_len(struct rxq *rxq)
+{
+       volatile struct mlx5_cqe64 *cqe;
+
+       cqe = get_cqe64(*rxq->cqes, rxq->elts_n, &rxq->cq_ci);
+       if (cqe)
+               return ntohl(cqe->byte_cnt);
+       return 0;
+}
+
+/**
  * DPDK callback for RX.
  *
  * @param dpdk_rxq
@@ -492,133 +556,82 @@ rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
 uint16_t
 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-       struct rxq *rxq = (struct rxq *)dpdk_rxq;
-       struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts;
-       const unsigned int elts_n = rxq->elts_n;
-       unsigned int elts_head = rxq->elts_head;
-       struct ibv_sge sges[pkts_n];
-       unsigned int i;
+       struct rxq *rxq = dpdk_rxq;
        unsigned int pkts_ret = 0;
-       int ret;
+       unsigned int i;
+       unsigned int rq_ci = rxq->rq_ci;
+       const unsigned int elts_n = rxq->elts_n;
+       const unsigned int wqe_cnt = elts_n - 1;

        for (i = 0; (i != pkts_n); ++i) {
-               struct rxq_elt *elt = &(*elts)[elts_head];
-               unsigned int len;
-               struct rte_mbuf *seg = elt->buf;
+               unsigned int idx = rq_ci & wqe_cnt;
                struct rte_mbuf *rep;
-               uint32_t flags;
-               uint16_t vlan_tci;
-
-               /* Sanity checks. */
-               assert(seg != NULL);
-               assert(elts_head < rxq->elts_n);
-               assert(rxq->elts_head < rxq->elts_n);
-               /*
-                * Fetch initial bytes of packet descriptor into a
-                * cacheline while allocating rep.
-                */
-               rte_mbuf_prefetch_part1(seg);
-               rte_mbuf_prefetch_part2(seg);
-               ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci);
-               if (unlikely(ret < 0)) {
-                       struct ibv_wc wc;
-                       int wcs_n;
-
-                       DEBUG("rxq=%p, poll_length() failed (ret=%d)",
-                             (void *)rxq, ret);
-                       /* ibv_poll_cq() must be used in case of failure. */
-                       wcs_n = ibv_poll_cq(rxq->cq, 1, &wc);
-                       if (unlikely(wcs_n == 0))
-                               break;
-                       if (unlikely(wcs_n < 0)) {
-                               DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)",
-                                     (void *)rxq, wcs_n);
-                               break;
-                       }
-                       assert(wcs_n == 1);
-                       if (unlikely(wc.status != IBV_WC_SUCCESS)) {
-                               /* Whatever, just repost the offending WR. */
-                               DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work"
-                                     " completion status (%d): %s",
-                                     (void *)rxq, wc.wr_id, wc.status,
-                                     ibv_wc_status_str(wc.status));
-#ifdef MLX5_PMD_SOFT_COUNTERS
-                               /* Increment dropped packets counter. */
-                               ++rxq->stats.idropped;
-#endif
-                               /* Add SGE to array for repost. */
-                               sges[i] = elt->sge;
-                               goto repost;
-                       }
-                       ret = wc.byte_len;
-               }
-               if (ret == 0)
-                       break;
-               assert(ret >= (rxq->crc_present << 2));
-               len = ret - (rxq->crc_present << 2);
+               struct rte_mbuf *pkt;
+               unsigned int len;
+               volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
+               volatile struct mlx5_cqe64 *cqe =
+                       &(*rxq->cqes)[rxq->cq_ci & wqe_cnt].cqe64;
+
+               pkt = (*rxq->elts)[idx];
+               rte_prefetch0(cqe);
                rep = rte_mbuf_raw_alloc(rxq->mp);
                if (unlikely(rep == NULL)) {
-                       /*
-                        * Unable to allocate a replacement mbuf,
-                        * repost WR.
-                        */
-                       DEBUG("rxq=%p: can't allocate a new mbuf",
-                             (void *)rxq);
-                       /* Increment out of memory counters. */
                        ++rxq->stats.rx_nombuf;
-                       ++rxq->priv->dev->data->rx_mbuf_alloc_failed;
-                       goto repost;
+                       break;
                }
-
-               /* Reconfigure sge to use rep instead of seg. */
-               elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
-               elt->buf = rep;
-
-               /* Add SGE to array for repost. */
-               sges[i] = elt->sge;
-
-               /* Update seg information. */
-               SET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM);
-               NB_SEGS(seg) = 1;
-               PORT(seg) = rxq->port_id;
-               NEXT(seg) = NULL;
-               PKT_LEN(seg) = len;
-               DATA_LEN(seg) = len;
-               if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip) {
-                       seg->packet_type = rxq_cq_to_pkt_type(flags);
-                       seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
-                       if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
-                               seg->ol_flags |= PKT_RX_VLAN_PKT;
-                               seg->vlan_tci = vlan_tci;
+               SET_DATA_OFF(rep, RTE_PKTMBUF_HEADROOM);
+               NB_SEGS(rep) = 1;
+               PORT(rep) = rxq->port_id;
+               NEXT(rep) = NULL;
+               len = rx_poll_len(rxq);
+               if (unlikely(len == 0)) {
+                       rte_mbuf_refcnt_set(rep, 0);
+                       __rte_mbuf_raw_free(rep);
+                       break;
+               }
+               /* Fill NIC descriptor with the new buffer.  The lkey and size
+                * of the buffers are already known, only the buffer address
+                * changes. */
+               wqe->addr = htonll((uintptr_t)rep->buf_addr +
+                                  RTE_PKTMBUF_HEADROOM);
+               (*rxq->elts)[idx] = rep;
+               /* Update pkt information. */
+               if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
+                   rxq->crc_present) {
+                       if (rxq->csum) {
+                               pkt->packet_type = rxq_cq_to_pkt_type(cqe);
+                               pkt->ol_flags = rxq_cq_to_ol_flags(rxq, cqe);
+                       }
+                       if (cqe->l4_hdr_type_etc & MLX5_CQE_VLAN_STRIPPED) {
+                               pkt->ol_flags |= PKT_RX_VLAN_PKT;
+                               pkt->vlan_tci = ntohs(cqe->vlan_info);
                        }
+                       if (rxq->crc_present)
+                               len -= ETHER_CRC_LEN;
                }
-               /* Return packet. */
-               *(pkts++) = seg;
-               ++pkts_ret;
+               PKT_LEN(pkt) = len;
+               DATA_LEN(pkt) = len;
 #ifdef MLX5_PMD_SOFT_COUNTERS
                /* Increment bytes counter. */
                rxq->stats.ibytes += len;
 #endif
-repost:
-               if (++elts_head >= elts_n)
-                       elts_head = 0;
-               continue;
+               /* Return packet. */
+               *(pkts++) = pkt;
+               ++pkts_ret;
+               ++rq_ci;
        }
-       if (unlikely(i == 0))
+       if (unlikely((i == 0) && (rq_ci == rxq->rq_ci)))
                return 0;
        /* Repost WRs. */
 #ifdef DEBUG_RECV
        DEBUG("%p: reposting %u WRs", (void *)rxq, i);
 #endif
-       ret = rxq->recv(rxq->wq, sges, i);
-       if (unlikely(ret)) {
-               /* Inability to repost WRs is fatal. */
-               DEBUG("%p: recv_burst(): failed (ret=%d)",
-                     (void *)rxq->priv,
-                     ret);
-               abort();
-       }
-       rxq->elts_head = elts_head;
+       /* Update the consumer index. */
+       rxq->rq_ci = rq_ci;
+       rte_wmb();
+       *rxq->cq_db = htonl(rxq->cq_ci);
+       rte_wmb();
+       *rxq->rq_db = htonl(rxq->rq_ci);
 #ifdef MLX5_PMD_SOFT_COUNTERS
        /* Increment packets counter. */
        rxq->stats.ipackets += pkts_ret;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 570345b..1827123 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -43,6 +43,7 @@
 #pragma GCC diagnostic ignored "-pedantic"
 #endif
 #include <infiniband/verbs.h>
+#include <infiniband/mlx5_hw.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-pedantic"
 #endif
@@ -61,6 +62,7 @@
 #include "mlx5.h"
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
+#include "mlx5_prm.h"

 struct mlx5_rxq_stats {
        unsigned int idx; /**< Mapping index. */
@@ -81,12 +83,6 @@ struct mlx5_txq_stats {
        uint64_t odropped; /**< Total of packets not sent when TX ring full. */
 };

-/* RX element. */
-struct rxq_elt {
-       struct ibv_sge sge; /* Scatter/Gather Element. */
-       struct rte_mbuf *buf; /* SGE buffer. */
-};
-
 /* Flow director queue structure. */
 struct fdir_queue {
        struct ibv_qp *qp; /* Associated RX QP. */
@@ -97,25 +93,28 @@ struct priv;

 /* RX queue descriptor. */
 struct rxq {
-       struct priv *priv; /* Back pointer to private data. */
-       struct rte_mempool *mp; /* Memory Pool for allocations. */
-       struct ibv_cq *cq; /* Completion Queue. */
-       struct ibv_exp_wq *wq; /* Work Queue. */
-       int32_t (*poll)(); /* Verbs poll function. */
-       int32_t (*recv)(); /* Verbs receive function. */
-       unsigned int port_id; /* Port ID for incoming packets. */
-       unsigned int elts_n; /* (*elts)[] length. */
-       unsigned int elts_head; /* Current index in (*elts)[]. */
        unsigned int csum:1; /* Enable checksum offloading. */
        unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
        unsigned int vlan_strip:1; /* Enable VLAN stripping. */
        unsigned int crc_present:1; /* CRC must be subtracted. */
-       struct rxq_elt (*elts)[]; /* RX elements. */
-       struct mlx5_rxq_stats stats; /* RX queue counters. */
+       uint16_t rq_ci;
+       uint16_t cq_ci;
+       uint16_t elts_n;
+       uint16_t port_id;
+       volatile struct mlx5_wqe_data_seg(*wqes)[];
+       volatile struct mlx5_cqe(*cqes)[];
+       volatile uint32_t *rq_db;
+       volatile uint32_t *cq_db;
+       struct rte_mbuf *(*elts)[];
+       struct rte_mempool *mp;
+       struct mlx5_rxq_stats stats;
 } __rte_cache_aligned;

 /* RX queue control descriptor. */
 struct rxq_ctrl {
+       struct priv *priv; /* Back pointer to private data. */
+       struct ibv_cq *cq; /* Completion Queue. */
+       struct ibv_exp_wq *wq; /* Work Queue. */
        struct ibv_exp_res_domain *rd; /* Resource Domain. */
        struct fdir_queue fdir_queue; /* Flow director queue. */
        struct ibv_mr *mr; /* Memory Region (for mp). */
@@ -284,8 +283,9 @@ int priv_allow_flow_type(struct priv *, enum 
hash_rxq_flow_type);
 int priv_rehash_flows(struct priv *);
 void rxq_cleanup(struct rxq_ctrl *);
 int rxq_rehash(struct rte_eth_dev *, struct rxq_ctrl *);
-int rxq_setup(struct rte_eth_dev *, struct rxq_ctrl *, uint16_t, unsigned int,
-             const struct rte_eth_rxconf *, struct rte_mempool *);
+int rxq_ctrl_setup(struct rte_eth_dev *, struct rxq_ctrl *, uint16_t,
+                  unsigned int, const struct rte_eth_rxconf *,
+                  struct rte_mempool *);
 int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
                        const struct rte_eth_rxconf *, struct rte_mempool *);
 void mlx5_rx_queue_release(void *);
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 3b9b771..4719e69 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -144,6 +144,7 @@ static void
 priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
 {
        struct rxq *rxq = (*priv->rxqs)[idx];
+       struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
        struct ibv_exp_wq_attr mod;
        uint16_t vlan_offloads =
                (on ? IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : 0) |
@@ -157,7 +158,7 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, 
int on)
                .vlan_offloads = vlan_offloads,
        };

-       err = ibv_exp_modify_wq(rxq->wq, &mod);
+       err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod);
        if (err) {
                ERROR("%p: failed to modified stripping mode: %s",
                      (void *)priv, strerror(err));
-- 
2.1.4

Reply via email to