This patch enables i40e TSO feature for both non-tunneling packet and tunneling 
packet.

Signed-off-by: Jijiang Liu <jijiang.liu at intel.com>
Signed-off-by: Miroslaw Walukiewicz <miroslaw.walukiewicz at intel.com>
---
 lib/librte_pmd_i40e/i40e_rxtx.c |   99 ++++++++++++++++++++++++++++-----------
 lib/librte_pmd_i40e/i40e_rxtx.h |   13 +++++
 2 files changed, 85 insertions(+), 27 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index 349c1e5..9b9bdcd 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -465,16 +465,13 @@ static inline void
 i40e_txd_enable_checksum(uint64_t ol_flags,
                        uint32_t *td_cmd,
                        uint32_t *td_offset,
-                       uint8_t l2_len,
-                       uint16_t l3_len,
-                       uint8_t outer_l2_len,
-                       uint16_t outer_l3_len,
+                       union i40e_tx_offload tx_offload,
                        uint32_t *cd_tunneling)
 {
        /* UDP tunneling packet TX checksum offload */
        if (unlikely(ol_flags & PKT_TX_OUTER_IP_CKSUM)) {

-               *td_offset |= (outer_l2_len >> 1)
+               *td_offset |= (tx_offload.outer_l2_len >> 1)
                                << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;

                if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
@@ -485,25 +482,35 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
                        *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;

                /* Now set the ctx descriptor fields */
-               *cd_tunneling |= (outer_l3_len >> 2) <<
+               *cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<
                                I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
-                               (l2_len >> 1) <<
+                               (tx_offload.l2_len >> 1) <<
                                I40E_TXD_CTX_QW0_NATLEN_SHIFT;

        } else
-               *td_offset |= (l2_len >> 1)
+               *td_offset |= (tx_offload.l2_len >> 1)
                        << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;

        /* Enable L3 checksum offloads */
        if (ol_flags & PKT_TX_IP_CKSUM) {
                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
-               *td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+               *td_offset |= (tx_offload.l3_len >> 2)
+                       << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
        } else if (ol_flags & PKT_TX_IPV4) {
                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
-               *td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+               *td_offset |= (tx_offload.l3_len >> 2)
+                        << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
        } else if (ol_flags & PKT_TX_IPV6) {
                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
-               *td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+               *td_offset |= (tx_offload.l3_len >> 2)
+                        << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+       }
+
+       if (ol_flags & PKT_TX_TCP_SEG) {
+               *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
+               *td_offset |= (tx_offload.l4_len >> 2)
+                       << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+               return;
        }

        /* Enable L4 checksum offloads */
@@ -1154,7 +1161,7 @@ i40e_calc_context_desc(uint64_t flags)
 {
        uint64_t mask = 0ULL;

-       mask |= PKT_TX_OUTER_IP_CKSUM;
+       mask |= (PKT_TX_OUTER_IP_CKSUM | PKT_TX_TCP_SEG);

 #ifdef RTE_LIBRTE_IEEE1588
        mask |= PKT_TX_IEEE1588_TMST;
@@ -1165,6 +1172,41 @@ i40e_calc_context_desc(uint64_t flags)
        return 0;
 }

+/* set i40e TSO context descriptor */
+static inline uint64_t
+i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
+{
+
+       uint64_t ctx_desc = 0;
+       uint32_t cd_cmd, hdr_len, cd_tso_len;
+
+
+       if (!tx_offload.l4_len) {
+               PMD_DRV_LOG(DEBUG, "L4 length set to 0");
+               return ctx_desc;
+       }
+
+       /**
+        * in case of tunneling packet, the outer_l2_len and
+        * outer_l3_len must be 0.
+        */
+       hdr_len = tx_offload.outer_l2_len +
+               tx_offload.outer_l3_len +
+               tx_offload.l2_len +
+               tx_offload.l3_len +
+               tx_offload.l4_len;
+
+       cd_cmd = I40E_TX_CTX_DESC_TSO;
+       cd_tso_len = mbuf->pkt_len - hdr_len;
+       ctx_desc |= ((uint64_t)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
+                               ((uint64_t)cd_tso_len <<
+                                I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+                               ((uint64_t)mbuf->tso_segsz <<
+                               I40E_TXD_CTX_QW1_MSS_SHIFT);
+
+       return ctx_desc;
+}
+
 uint16_t
 i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -1183,15 +1225,12 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
        uint32_t tx_flags;
        uint32_t td_tag;
        uint64_t ol_flags;
-       uint8_t l2_len;
-       uint16_t l3_len;
-       uint8_t outer_l2_len;
-       uint16_t outer_l3_len;
        uint16_t nb_used;
        uint16_t nb_ctx;
        uint16_t tx_last;
        uint16_t slen;
        uint64_t buf_dma_addr;
+       union i40e_tx_offload tx_offload = { .data = 0 };

        txq = tx_queue;
        sw_ring = txq->sw_ring;
@@ -1213,10 +1252,12 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
                RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);

                ol_flags = tx_pkt->ol_flags;
-               l2_len = tx_pkt->l2_len;
-               l3_len = tx_pkt->l3_len;
-               outer_l2_len = tx_pkt->outer_l2_len;
-               outer_l3_len = tx_pkt->outer_l3_len;
+               tx_offload.l2_len = tx_pkt->l2_len;
+               tx_offload.l3_len = tx_pkt->l3_len;
+               tx_offload.l4_len = tx_pkt->l4_len;
+               tx_offload.tso_segsz = tx_pkt->tso_segsz;
+               tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
+               tx_offload.outer_l3_len = tx_pkt->outer_l3_len;

                /* Calculate the number of context descriptors needed. */
                nb_ctx = i40e_calc_context_desc(ol_flags);
@@ -1267,9 +1308,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
                cd_tunneling_params = 0;
                if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK) {
                        i40e_txd_enable_checksum(ol_flags, &td_cmd, &td_offset,
-                               l2_len, l3_len, outer_l2_len,
-                               outer_l3_len,
-                               &cd_tunneling_params);
+                               tx_offload, &cd_tunneling_params);
                }

                if (unlikely(nb_ctx)) {
@@ -1287,12 +1326,18 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
                                rte_pktmbuf_free_seg(txe->mbuf);
                                txe->mbuf = NULL;
                        }
-#ifdef RTE_LIBRTE_IEEE1588
-                       if (ol_flags & PKT_TX_IEEE1588_TMST)
+                       /* TSO enabled means no timestamp */
+                       if (ol_flags & PKT_TX_TCP_SEG) {
                                cd_type_cmd_tso_mss |=
-                                       ((uint64_t)I40E_TX_CTX_DESC_TSYN <<
-                                               I40E_TXD_CTX_QW1_CMD_SHIFT);
+                                       i40e_set_tso_ctx(tx_pkt, tx_offload);
+                       } else {
+#ifdef RTE_LIBRTE_IEEE1588
+                               if (ol_flags & PKT_TX_IEEE1588_TMST)
+                                       cd_type_cmd_tso_mss |=
+                                               
((uint64_t)I40E_TX_CTX_DESC_TSYN <<
+                                                       
I40E_TXD_CTX_QW1_CMD_SHIFT);
 #endif
+                       }
                        ctx_txd->tunneling_params =
                                rte_cpu_to_le_32(cd_tunneling_params);
                        ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.h b/lib/librte_pmd_i40e/i40e_rxtx.h
index af932e3..f1033ef 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.h
+++ b/lib/librte_pmd_i40e/i40e_rxtx.h
@@ -154,6 +154,19 @@ struct i40e_tx_queue {
        bool tx_deferred_start; /**< don't start this queue in dev start */
 };

+/** Offload features */
+union i40e_tx_offload {
+       uint64_t data;
+       struct {
+               uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+               uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+               uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+               uint64_t tso_segsz:16; /**< TCP TSO segment size */
+               uint64_t outer_l2_len:8; /**< L2 outer Header Length */
+               uint64_t outer_l3_len:16; /**< L3 outer Header Length */
+       };
+};
+
 int i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);
-- 
1.7.7.6

Reply via email to