Add a separate routine to handle no-fast-free offload
in vector Tx path for multisegmented packets.

Signed-off-by: Ashwin Sekhar T K <asek...@marvell.com>
---
 drivers/net/cnxk/cn10k_tx.h | 124 +++++++++++++++++-------------------
 1 file changed, 59 insertions(+), 65 deletions(-)

diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 815cd2ff1f..a4c578354c 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -956,6 +956,14 @@ cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, 
const uint16_t flags)
        rte_io_wmb();
 #endif
        m->next = NULL;
+
+       /* Quickly handle single segmented packets. With this if-condition
+        * compiler will completely optimize out the below do-while loop
+        * from the Tx handler when NIX_TX_MULTI_SEG_F offload is not set.
+        */
+       if (!(flags & NIX_TX_MULTI_SEG_F))
+               goto done;
+
        m = m_next;
        if (!m)
                goto done;
@@ -1360,6 +1368,30 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union 
nix_send_hdr_w1_u *w1,
        }
 }
 
+static __rte_always_inline uint16_t
+cn10k_nix_prepare_mseg_vec_noff(struct rte_mbuf *m, uint64_t *cmd,
+                               uint64x2_t *cmd0, uint64x2_t *cmd1,
+                               uint64x2_t *cmd2, uint64x2_t *cmd3,
+                               const uint32_t flags)
+{
+       uint16_t segdw;
+
+       vst1q_u64(cmd, *cmd0); /* Send hdr */
+       if (flags & NIX_TX_NEED_EXT_HDR) {
+               vst1q_u64(cmd + 2, *cmd2); /* ext hdr */
+               vst1q_u64(cmd + 4, *cmd1); /* sg */
+       } else {
+               vst1q_u64(cmd + 2, *cmd1); /* sg */
+       }
+
+       segdw = cn10k_nix_prepare_mseg(m, cmd, flags);
+
+       if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
+               vst1q_u64(cmd + segdw * 2 - 2, *cmd3);
+
+       return segdw;
+}
+
 static __rte_always_inline void
 cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
                                union nix_send_hdr_w0_u *sh,
@@ -1389,17 +1421,6 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, 
uint64_t *cmd,
 
        nb_segs = m->nb_segs - 1;
        m_next = m->next;
-
-       /* Set invert df if buffer is not to be freed by H/W */
-       if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
-               sg_u |= (cnxk_nix_prefree_seg(m) << 55);
-               /* Mark mempool object as "put" since it is freed by NIX */
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-       if (!(sg_u & (1ULL << 55)))
-               RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
-       rte_io_wmb();
-#endif
-
        m->next = NULL;
        m = m_next;
        /* Fill mbuf segments */
@@ -1409,16 +1430,6 @@ cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, 
uint64_t *cmd,
                len -= dlen;
                sg_u = sg_u | ((uint64_t)dlen << (i << 4));
                *slist = rte_mbuf_data_iova(m);
-               /* Set invert df if buffer is not to be freed by H/W */
-               if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
-                       sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
-                       /* Mark mempool object as "put" since it is freed by NIX
-                        */
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-               if (!(sg_u & (1ULL << (i + 55))))
-                       RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
-               rte_io_wmb();
-#endif
                slist++;
                i++;
                nb_segs--;
@@ -1456,21 +1467,8 @@ cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t 
*cmd, uint64x2_t *cmd0,
        union nix_send_hdr_w0_u sh;
        union nix_send_sg_s sg;
 
-       if (m->nb_segs == 1) {
-               if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
-                       sg.u = vgetq_lane_u64(cmd1[0], 0);
-                       sg.u |= (cnxk_nix_prefree_seg(m) << 55);
-                       cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
-               }
-
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-               sg.u = vgetq_lane_u64(cmd1[0], 0);
-               if (!(sg.u & (1ULL << 55)))
-                       RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
-               rte_io_wmb();
-#endif
+       if (m->nb_segs == 1)
                return;
-       }
 
        sh.u = vgetq_lane_u64(cmd0[0], 0);
        sg.u = vgetq_lane_u64(cmd1[0], 0);
@@ -1491,16 +1489,32 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, 
uint64x2_t *cmd0,
                               uint64_t *lmt_addr, __uint128_t *data128,
                               uint8_t *shift, const uint16_t flags)
 {
-       uint8_t j, off, lmt_used;
+       uint8_t j, off, lmt_used = 0;
+
+       if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+               off = 0;
+               for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
+                       if (off + segdw[j] > 8) {
+                               *data128 |= ((__uint128_t)off - 1) << *shift;
+                               *shift += 3;
+                               lmt_used++;
+                               lmt_addr += 16;
+                               off = 0;
+                       }
+                       off += cn10k_nix_prepare_mseg_vec_noff(mbufs[j],
+                                       lmt_addr + off * 2, &cmd0[j], &cmd1[j],
+                                       &cmd2[j], &cmd3[j], flags);
+               }
+               *data128 |= ((__uint128_t)off - 1) << *shift;
+               *shift += 3;
+               lmt_used++;
+               return lmt_used;
+       }
 
        if (!(flags & NIX_TX_NEED_EXT_HDR) &&
            !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
                /* No segments in 4 consecutive packets. */
                if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
-                       for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
-                               cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
-                                                          &cmd0[j], &cmd1[j],
-                                                          segdw[j], flags);
                        vst1q_u64(lmt_addr, cmd0[0]);
                        vst1q_u64(lmt_addr + 2, cmd1[0]);
                        vst1q_u64(lmt_addr + 4, cmd0[1]);
@@ -1517,18 +1531,10 @@ cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, 
uint64x2_t *cmd0,
                }
        }
 
-       lmt_used = 0;
        for (j = 0; j < NIX_DESCS_PER_LOOP;) {
                /* Fit consecutive packets in same LMTLINE. */
                if ((segdw[j] + segdw[j + 1]) <= 8) {
                        if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
-                               cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
-                                                          &cmd0[j], &cmd1[j],
-                                                          segdw[j], flags);
-                               cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
-                                                          &cmd0[j + 1],
-                                                          &cmd1[j + 1],
-                                                          segdw[j + 1], flags);
                                /* TSTAMP takes 4 each, no segs. */
                                vst1q_u64(lmt_addr, cmd0[j]);
                                vst1q_u64(lmt_addr + 2, cmd2[j]);
@@ -1643,23 +1649,11 @@ cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t 
segdw, uintptr_t laddr,
 {
        uint8_t off;
 
-       /* Handle no fast free when security is enabled without mseg */
-       if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
-           (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
-           !(flags & NIX_TX_MULTI_SEG_F)) {
-               union nix_send_sg_s sg;
-
-               sg.u = vgetq_lane_u64(cmd1, 0);
-               sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
-               cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
-
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-               sg.u = vgetq_lane_u64(cmd1, 0);
-               if (!(sg.u & (1ULL << 55)))
-                       RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
-                                               0);
-               rte_io_wmb();
-#endif
+       if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+               cn10k_nix_prepare_mseg_vec_noff(mbuf, LMT_OFF(laddr, 0, 0),
+                                               &cmd0, &cmd1, &cmd2, &cmd3,
+                                               flags);
+               return;
        }
        if (flags & NIX_TX_MULTI_SEG_F) {
                if ((flags & NIX_TX_NEED_EXT_HDR) &&
-- 
2.25.1

Reply via email to