Author: slavash
Date: Wed Dec  5 13:32:46 2018
New Revision: 341549
URL: https://svnweb.freebsd.org/changeset/base/341549

Log:
  mlx4en: Add support for receiving all data using one or more MCLBYTES sized 
mbufs.
  Also when the MTU is greater than MCLBYTES.
  
  Submitted by:   hselasky@
  Approved by:    hselasky (mentor)
  MFC after:      1 week
  Sponsored by:   Mellanox Technologies

Modified:
  head/sys/dev/mlx4/mlx4_en/en.h
  head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c
  head/sys/dev/mlx4/mlx4_en/mlx4_en_rx.c

Modified: head/sys/dev/mlx4/mlx4_en/en.h
==============================================================================
--- head/sys/dev/mlx4/mlx4_en/en.h      Wed Dec  5 13:32:15 2018        
(r341548)
+++ head/sys/dev/mlx4/mlx4_en/en.h      Wed Dec  5 13:32:46 2018        
(r341549)
@@ -75,6 +75,15 @@
 #define MAX_RX_RINGS           128
 #define MIN_RX_RINGS           4
 #define TXBB_SIZE              64
+
+#ifndef MLX4_EN_MAX_RX_SEGS
+#define        MLX4_EN_MAX_RX_SEGS 1   /* or 8 */
+#endif
+
+#ifndef MLX4_EN_MAX_RX_BYTES
+#define        MLX4_EN_MAX_RX_BYTES MCLBYTES
+#endif
+
 #define HEADROOM               (2048 / TXBB_SIZE + 1)
 #define INIT_OWNER_BIT         0xffffffff
 #define STAMP_STRIDE           64
@@ -297,10 +306,12 @@ struct mlx4_en_tx_ring {
 };
 
 struct mlx4_en_rx_desc {
-       /* actual number of entries depends on rx ring stride */
-       struct mlx4_wqe_data_seg data[0];
+       struct mlx4_wqe_data_seg data[MLX4_EN_MAX_RX_SEGS];
 };
 
+/* the size of the structure above must be power of two */
+CTASSERT(powerof2(sizeof(struct mlx4_en_rx_desc)));
+
 struct mlx4_en_rx_mbuf {
        bus_dmamap_t dma_map;
        struct mbuf *mbuf;
@@ -309,7 +320,7 @@ struct mlx4_en_rx_mbuf {
 struct mlx4_en_rx_spare {
        bus_dmamap_t dma_map;
        struct mbuf *mbuf;
-       u64 paddr_be;
+       bus_dma_segment_t segs[MLX4_EN_MAX_RX_SEGS];
 };
 
 struct mlx4_en_rx_ring {
@@ -319,7 +330,6 @@ struct mlx4_en_rx_ring {
        u32 size ;      /* number of Rx descs*/
        u32 actual_size;
        u32 size_mask;
-       u16 stride;
        u16 log_stride;
        u16 cqn;        /* index of port CQ associated with this ring */
        u32 prod;
@@ -327,6 +337,7 @@ struct mlx4_en_rx_ring {
        u32 buf_size;
        u8  fcs_del;
        u32 rx_mb_size;
+       u32 rx_mr_key_be;
        int qpn;
        u8 *buf;
        struct mlx4_en_rx_mbuf *mbuf;
@@ -559,7 +570,6 @@ struct mlx4_en_priv {
        int registered;
        int gone;
        int allocated;
-       int stride;
        unsigned char current_mac[ETH_ALEN + 2];
         u64 mac;
        int mac_index;
@@ -805,7 +815,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
                           u32 size, int node);
 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
                             struct mlx4_en_rx_ring **pring,
-                            u32 size, u16 stride);
+                            u32 size);
 void mlx4_en_rx_que(void *context, int pending);
 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv);
 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,

Modified: head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c
==============================================================================
--- head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c  Wed Dec  5 13:32:15 2018        
(r341548)
+++ head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c  Wed Dec  5 13:32:46 2018        
(r341549)
@@ -1683,7 +1683,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
        for (i = 0; i < priv->rx_ring_num; i++) {
                if (priv->rx_ring[i])
                        mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i],
-                               priv->prof->rx_ring_size, priv->stride);
+                               priv->prof->rx_ring_size);
                if (priv->rx_cq[i])
                        mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
        }
@@ -1734,8 +1734,7 @@ err:
        for (i = 0; i < priv->rx_ring_num; i++) {
                if (priv->rx_ring[i])
                        mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i],
-                                               prof->rx_ring_size,
-                                               priv->stride);
+                                               prof->rx_ring_size);
                if (priv->rx_cq[i])
                        mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
        }
@@ -2236,9 +2235,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int 
                 err = -EINVAL;
                 goto out;
         }
-
-       priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
-                                         DS_SIZE);
 
        mlx4_en_sysctl_conf(priv);
 

Modified: head/sys/dev/mlx4/mlx4_en/mlx4_en_rx.c
==============================================================================
--- head/sys/dev/mlx4/mlx4_en/mlx4_en_rx.c      Wed Dec  5 13:32:15 2018        
(r341548)
+++ head/sys/dev/mlx4/mlx4_en/mlx4_en_rx.c      Wed Dec  5 13:32:46 2018        
(r341549)
@@ -44,14 +44,13 @@
 
 #include "en.h"
 
-
+#if (MLX4_EN_MAX_RX_SEGS == 1)
 static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
                                 struct mlx4_en_rx_ring *ring,
                                 int index)
 {
-       struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *)
-           (ring->buf + (ring->stride * index));
-       int possible_frags;
+       struct mlx4_en_rx_desc *rx_desc =
+           ((struct mlx4_en_rx_desc *)ring->buf) + index;
        int i;
 
        /* Set size and memtype fields */
@@ -63,38 +62,75 @@ static void mlx4_en_init_rx_desc(struct mlx4_en_priv *
         * stride, remaining (unused) fragments must be padded with
         * null address/size and a special memory key:
         */
-       possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / 
DS_SIZE;
-       for (i = 1; i < possible_frags; i++) {
+       for (i = 1; i < MLX4_EN_MAX_RX_SEGS; i++) {
                rx_desc->data[i].byte_count = 0;
                rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
                rx_desc->data[i].addr = 0;
        }
 }
+#endif
 
+static inline struct mbuf *
+mlx4_en_alloc_mbuf(struct mlx4_en_rx_ring *ring)
+{
+       struct mbuf *mb;
+
+#if (MLX4_EN_MAX_RX_SEGS == 1)
+        mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+        if (likely(mb != NULL))
+               mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
+#else
+       mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MLX4_EN_MAX_RX_BYTES);
+       if (likely(mb != NULL)) {
+               struct mbuf *mb_head = mb;
+               int i;
+
+               mb->m_len = MLX4_EN_MAX_RX_BYTES;
+               mb->m_pkthdr.len = MLX4_EN_MAX_RX_BYTES;
+
+               for (i = 1; i != MLX4_EN_MAX_RX_SEGS; i++) {
+                       if (mb_head->m_pkthdr.len >= ring->rx_mb_size)
+                               break;
+                       mb = (mb->m_next = m_getjcl(M_NOWAIT, MT_DATA, 0, 
MLX4_EN_MAX_RX_BYTES));
+                       if (unlikely(mb == NULL)) {
+                               m_freem(mb_head);
+                               return (NULL);
+                       }
+                       mb->m_len = MLX4_EN_MAX_RX_BYTES;
+                       mb_head->m_pkthdr.len += MLX4_EN_MAX_RX_BYTES;
+               }
+               /* rewind to first mbuf in chain */
+               mb = mb_head;
+       }
+#endif
+       return (mb);
+}
+
 static int
-mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
-     __be64 *pdma, struct mlx4_en_rx_mbuf *mb_list)
+mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc 
*rx_desc,
+    struct mlx4_en_rx_mbuf *mb_list)
 {
-       bus_dma_segment_t segs[1];
+       bus_dma_segment_t segs[MLX4_EN_MAX_RX_SEGS];
        bus_dmamap_t map;
        struct mbuf *mb;
        int nsegs;
        int err;
+#if (MLX4_EN_MAX_RX_SEGS != 1)
+       int i;
+#endif
 
        /* try to allocate a new spare mbuf */
        if (unlikely(ring->spare.mbuf == NULL)) {
-               mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+               mb = mlx4_en_alloc_mbuf(ring);
                if (unlikely(mb == NULL))
                        return (-ENOMEM);
-               /* setup correct length */
-               mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
 
                /* make sure IP header gets aligned */
                m_adj(mb, MLX4_NET_IP_ALIGN);
 
                /* load spare mbuf into BUSDMA */
                err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, 
ring->spare.dma_map,
-                   mb, segs, &nsegs, BUS_DMA_NOWAIT);
+                   mb, ring->spare.segs, &nsegs, BUS_DMA_NOWAIT);
                if (unlikely(err != 0)) {
                        m_freem(mb);
                        return (err);
@@ -102,8 +138,14 @@ mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
 
                /* store spare info */
                ring->spare.mbuf = mb;
-               ring->spare.paddr_be = cpu_to_be64(segs[0].ds_addr);
 
+#if (MLX4_EN_MAX_RX_SEGS != 1)
+               /* zero remaining segs */
+               for (i = nsegs; i != MLX4_EN_MAX_RX_SEGS; i++) {
+                       ring->spare.segs[i].ds_addr = 0;
+                       ring->spare.segs[i].ds_len = 0;
+               }
+#endif
                bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
                    BUS_DMASYNC_PREREAD);
        }
@@ -115,13 +157,10 @@ mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
                bus_dmamap_unload(ring->dma_tag, mb_list->dma_map);
        }
 
-       mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+       mb = mlx4_en_alloc_mbuf(ring);
        if (unlikely(mb == NULL))
                goto use_spare;
 
-       /* setup correct length */
-       mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size;
-
        /* make sure IP header gets aligned */
        m_adj(mb, MLX4_NET_IP_ALIGN);
 
@@ -132,7 +171,20 @@ mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
                goto use_spare;
        }
 
-       *pdma = cpu_to_be64(segs[0].ds_addr);
+#if (MLX4_EN_MAX_RX_SEGS == 1)
+       rx_desc->data[0].addr = cpu_to_be64(segs[0].ds_addr);
+#else
+       for (i = 0; i != nsegs; i++) {
+               rx_desc->data[i].byte_count = cpu_to_be32(segs[i].ds_len);
+               rx_desc->data[i].lkey = ring->rx_mr_key_be;
+               rx_desc->data[i].addr = cpu_to_be64(segs[i].ds_addr);
+       }
+       for (; i != MLX4_EN_MAX_RX_SEGS; i++) {
+               rx_desc->data[i].byte_count = 0;
+               rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
+               rx_desc->data[i].addr = 0;
+       }
+#endif
        mb_list->mbuf = mb;
 
        bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD);
@@ -149,7 +201,21 @@ use_spare:
        ring->spare.mbuf = NULL;
 
        /* store physical address */
-       *pdma = ring->spare.paddr_be;
+#if (MLX4_EN_MAX_RX_SEGS == 1)
+       rx_desc->data[0].addr = cpu_to_be64(ring->spare.segs[0].ds_addr);
+#else
+       for (i = 0; i != MLX4_EN_MAX_RX_SEGS; i++) {
+               if (ring->spare.segs[i].ds_len != 0) {
+                       rx_desc->data[i].byte_count = 
cpu_to_be32(ring->spare.segs[i].ds_len);
+                       rx_desc->data[i].lkey = ring->rx_mr_key_be;
+                       rx_desc->data[i].addr = 
cpu_to_be64(ring->spare.segs[i].ds_addr);
+               } else {
+                       rx_desc->data[i].byte_count = 0;
+                       rx_desc->data[i].lkey = 
cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
+                       rx_desc->data[i].addr = 0;
+               }
+       }
+#endif
        return (0);
 }
 
@@ -167,13 +233,13 @@ static int
 mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
     struct mlx4_en_rx_ring *ring, int index)
 {
-       struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *)
-           (ring->buf + (index * ring->stride));
+       struct mlx4_en_rx_desc *rx_desc =
+           ((struct mlx4_en_rx_desc *)ring->buf) + index;
        struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index;
 
        mb_list->mbuf = NULL;
 
-       if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) {
+       if (mlx4_en_alloc_buf(ring, rx_desc, mb_list)) {
                priv->port_stats.rx_alloc_failed++;
                return (-ENOMEM);
        }
@@ -321,7 +387,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
            BUS_SPACE_MAXADDR,          /* highaddr */
            NULL, NULL,                 /* filter, filterarg */
            MJUM16BYTES,                /* maxsize */
-           1,                          /* nsegments */
+           MLX4_EN_MAX_RX_SEGS,        /* nsegments */
            MJUM16BYTES,                /* maxsegsize */
            0,                          /* flags */
            NULL, NULL,                 /* lockfunc, lockfuncarg */
@@ -334,11 +400,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
        ring->cons = 0;
        ring->size = size;
        ring->size_mask = size - 1;
-       ring->stride = roundup_pow_of_two(
-           sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
-       ring->log_stride = ffs(ring->stride) - 1;
-       ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
 
+       ring->log_stride = ilog2(sizeof(struct mlx4_en_rx_desc));
+       ring->buf_size = (ring->size * sizeof(struct mlx4_en_rx_desc)) + 
TXBB_SIZE;
+
        tmp = size * sizeof(struct mlx4_en_rx_mbuf);
 
         ring->mbuf = kzalloc(tmp, GFP_KERNEL);
@@ -398,11 +463,11 @@ err_ring:
 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
 {
        struct mlx4_en_rx_ring *ring;
+#if (MLX4_EN_MAX_RX_SEGS == 1)
        int i;
+#endif
        int ring_ind;
        int err;
-       int stride = roundup_pow_of_two(
-           sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
 
        for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
                ring = priv->rx_ring[ring_ind];
@@ -413,8 +478,7 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *pri
                ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
                 ring->rx_mb_size = priv->rx_mb_size;
 
-               ring->stride = stride;
-               if (ring->stride <= TXBB_SIZE) {
+               if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE) {
                        /* Stamp first unused send wqe */
                        __be32 *ptr = (__be32 *)ring->buf;
                        __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT);
@@ -423,15 +487,18 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *pri
                        ring->buf += TXBB_SIZE;
                }
 
-               ring->log_stride = ffs(ring->stride) - 1;
-               ring->buf_size = ring->size * ring->stride;
+               ring->log_stride = ilog2(sizeof(struct mlx4_en_rx_desc));
+               ring->buf_size = ring->size * sizeof(struct mlx4_en_rx_desc);
 
                memset(ring->buf, 0, ring->buf_size);
                mlx4_en_update_rx_prod_db(ring);
 
+#if (MLX4_EN_MAX_RX_SEGS == 1)
                /* Initialize all descriptors */
                for (i = 0; i < ring->size; i++)
                        mlx4_en_init_rx_desc(priv, ring, i);
+#endif
+               ring->rx_mr_key_be = cpu_to_be32(priv->mdev->mr.key);
 
 #ifdef INET
                /* Configure lro mngr */
@@ -466,7 +533,7 @@ err_buffers:
 
        while (ring_ind >= 0) {
                ring = priv->rx_ring[ring_ind];
-               if (ring->stride <= TXBB_SIZE)
+               if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE)
                        ring->buf -= TXBB_SIZE;
                ring_ind--;
        }
@@ -477,14 +544,14 @@ err_buffers:
 
 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
                             struct mlx4_en_rx_ring **pring,
-                            u32 size, u16 stride)
+                            u32 size)
 {
        struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_rx_ring *ring = *pring;
        uint32_t x;
 
        mlx4_en_unmap_buffer(&ring->wqres.buf);
-       mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
+       mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * sizeof(struct 
mlx4_en_rx_desc) + TXBB_SIZE);
        for (x = 0; x != size; x++)
                bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map);
        /* free spare mbuf, if any */
@@ -511,7 +578,7 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *p
        tcp_lro_free(&ring->lro);
 #endif
        mlx4_en_free_rx_buf(priv, ring);
-       if (ring->stride <= TXBB_SIZE)
+       if (sizeof(struct mlx4_en_rx_desc) <= TXBB_SIZE)
                ring->buf -= TXBB_SIZE;
 }
 
@@ -557,21 +624,43 @@ mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_e
     struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list,
     int length)
 {
+#if (MLX4_EN_MAX_RX_SEGS != 1)
+       struct mbuf *mb_head;
+#endif
        struct mbuf *mb;
 
        /* get mbuf */
        mb = mb_list->mbuf;
 
        /* collect used fragment while atomically replacing it */
-       if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list))
+       if (mlx4_en_alloc_buf(ring, rx_desc, mb_list))
                return (NULL);
 
        /* range check hardware computed value */
-       if (unlikely(length > mb->m_len))
-               length = mb->m_len;
+       if (unlikely(length > mb->m_pkthdr.len))
+               length = mb->m_pkthdr.len;
 
+#if (MLX4_EN_MAX_RX_SEGS == 1)
        /* update total packet length in packet header */
        mb->m_len = mb->m_pkthdr.len = length;
+#else
+       mb->m_pkthdr.len = length;
+       for (mb_head = mb; mb != NULL; mb = mb->m_next) {
+               if (mb->m_len > length)
+                       mb->m_len = length;
+               length -= mb->m_len;
+               if (likely(length == 0)) {
+                       if (likely(mb->m_next != NULL)) {
+                               /* trim off empty mbufs */
+                               m_freem(mb->m_next);
+                               mb->m_next = NULL;
+                       }
+                       break;
+               }
+       }
+       /* rewind to first mbuf in chain */
+       mb = mb_head;
+#endif
        return (mb);
 }
 
@@ -660,8 +749,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, stru
        while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
                    cons_index & size)) {
                mb_list = ring->mbuf + index;
-               rx_desc = (struct mlx4_en_rx_desc *)
-                   (ring->buf + (index << ring->log_stride));
+               rx_desc = ((struct mlx4_en_rx_desc *)ring->buf) + index;
 
                /*
                 * make sure we read the CQE after we read the ownership bit
@@ -830,7 +918,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *
        qp->event = mlx4_en_sqp_event;
 
        memset(context, 0, sizeof *context);
-       mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
+       mlx4_en_fill_qp_context(priv, ring->actual_size, sizeof(struct 
mlx4_en_rx_desc), 0, 0,
                                qpn, ring->cqn, -1, context);
        context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to