This patch enables i40e TSO feature for both non-tunneling packet and UDP 
tunneling packet.

Signed-off-by: Jijiang Liu <jijiang.liu at intel.com>
Signed-off-by: Miroslaw Walukiewicz <miroslaw.walukiewicz at intel.com>
---
 lib/librte_pmd_i40e/i40e_ethdev.c |    3 +-
 lib/librte_pmd_i40e/i40e_rxtx.c   |  111 +++++++++++++++++++++++++++---------
 lib/librte_pmd_i40e/i40e_rxtx.h   |   13 ++++
 3 files changed, 98 insertions(+), 29 deletions(-)

diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c 
b/lib/librte_pmd_i40e/i40e_ethdev.c
index b47a3d2..af95296 100644
--- a/lib/librte_pmd_i40e/i40e_ethdev.c
+++ b/lib/librte_pmd_i40e/i40e_ethdev.c
@@ -1516,7 +1516,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
                DEV_TX_OFFLOAD_IPV4_CKSUM |
                DEV_TX_OFFLOAD_UDP_CKSUM |
                DEV_TX_OFFLOAD_TCP_CKSUM |
-               DEV_TX_OFFLOAD_SCTP_CKSUM;
+               DEV_TX_OFFLOAD_SCTP_CKSUM |
+               DEV_TX_OFFLOAD_TCP_TSO;
        dev_info->reta_size = pf->hash_lut_size;

        dev_info->default_rxconf = (struct rte_eth_rxconf) {
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index 2beae3c..529ffb2 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -460,18 +460,15 @@ static inline void
 i40e_txd_enable_checksum(uint64_t ol_flags,
                        uint32_t *td_cmd,
                        uint32_t *td_offset,
-                       uint8_t l2_len,
-                       uint16_t l3_len,
-                       uint8_t outer_l2_len,
-                       uint16_t outer_l3_len,
+                       union i40e_tx_offload tx_offload,
                        uint32_t *cd_tunneling)
 {
-       if (!l2_len) {
+       if (!tx_offload.l2_len) {
                PMD_DRV_LOG(DEBUG, "L2 length set to 0");
                return;
        }

-       if (!l3_len) {
+       if (!tx_offload.l3_len) {
                PMD_DRV_LOG(DEBUG, "L3 length set to 0");
                return;
        }
@@ -479,7 +476,7 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
        /* UDP tunneling packet TX checksum offload */
        if (unlikely(ol_flags & PKT_TX_UDP_TUNNEL_PKT)) {

-               *td_offset |= (outer_l2_len >> 1)
+               *td_offset |= (tx_offload.outer_l2_len >> 1)
                                << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;

                if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
@@ -490,26 +487,36 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
                        *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;

                /* Now set the ctx descriptor fields */
-               *cd_tunneling |= (outer_l3_len >> 2) <<
+               *cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<
                                I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
                                I40E_TXD_CTX_UDP_TUNNELING |
-                               (l2_len >> 1) <<
+                               (tx_offload.l2_len >> 1) <<
                                I40E_TXD_CTX_QW0_NATLEN_SHIFT;

        } else
-               *td_offset |= (l2_len >> 1)
+               *td_offset |= (tx_offload.l2_len >> 1)
                        << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;

        /* Enable L3 checksum offloads */
        if (ol_flags & PKT_TX_IPV4_CSUM) {
                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
-               *td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+               *td_offset |= (tx_offload.l3_len >> 2)
+                               << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
        } else if (ol_flags & PKT_TX_IPV4) {
                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
-               *td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+               *td_offset |= (tx_offload.l3_len >> 2)
+                               << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
        } else if (ol_flags & PKT_TX_IPV6) {
                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
-               *td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+               *td_offset |= (tx_offload.l3_len >> 2)
+                               << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+       }
+
+       if (ol_flags & PKT_TX_TCP_SEG) {
+               *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
+               *td_offset |= (tx_offload.l4_len >> 2)
+                               << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+               return;
        }

        /* Enable L4 checksum offloads */
@@ -1160,8 +1167,11 @@ i40e_calc_context_desc(uint64_t flags)
 {
        uint64_t mask = 0ULL;

-       if (flags | PKT_TX_UDP_TUNNEL_PKT)
+       if (flags & PKT_TX_UDP_TUNNEL_PKT)
                mask |= PKT_TX_UDP_TUNNEL_PKT;
+       if (flags & PKT_TX_TCP_SEG)
+               /* need for context descriptor when TSO enabled */
+               mask |= PKT_TX_TCP_SEG;

 #ifdef RTE_LIBRTE_IEEE1588
        mask |= PKT_TX_IEEE1588_TMST;
@@ -1172,6 +1182,47 @@ i40e_calc_context_desc(uint64_t flags)
        return 0;
 }

+/* set i40e TSO context descriptor */
+static inline uint64_t
+i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
+{
+
+       uint64_t ctx_desc = 0;
+       uint32_t cd_cmd, hdr_len, cd_tso_len;
+
+
+       if (!tx_offload.l4_len) {
+               PMD_DRV_LOG(DEBUG, "L4 length set to 0");
+               return ctx_desc;
+       }
+
+       if (unlikely(mbuf->ol_flags & PKT_TX_UDP_TUNNEL_PKT)) {
+
+               /**
+                * Caculate total header length of UDP tunneling packet.
+                * the l2_len is outer UDP header length plus tunnel
+                * header length plus inner L2 header length.
+                */
+               hdr_len = tx_offload.outer_l2_len +
+                               tx_offload.outer_l3_len +
+                               tx_offload.l2_len +
+                               tx_offload.l3_len +
+                               tx_offload.l4_len;
+       } else
+               hdr_len = tx_offload.l2_len + tx_offload.l3_len +
+                               tx_offload.l4_len;
+
+       cd_cmd = I40E_TX_CTX_DESC_TSO;
+       cd_tso_len = mbuf->pkt_len - hdr_len;
+       ctx_desc |= ((uint64_t)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
+                               ((uint64_t)cd_tso_len <<
+                                I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+                               ((uint64_t)mbuf->tso_segsz <<
+                               I40E_TXD_CTX_QW1_MSS_SHIFT);
+
+       return ctx_desc;
+}
+
 uint16_t
 i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -1190,15 +1241,12 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
        uint32_t tx_flags;
        uint32_t td_tag;
        uint64_t ol_flags;
-       uint8_t l2_len;
-       uint16_t l3_len;
-       uint8_t outer_l2_len;
-       uint16_t outer_l3_len;
        uint16_t nb_used;
        uint16_t nb_ctx;
        uint16_t tx_last;
        uint16_t slen;
        uint64_t buf_dma_addr;
+       union i40e_tx_offload tx_offload = { .data = 0 };

        txq = tx_queue;
        sw_ring = txq->sw_ring;
@@ -1220,10 +1268,12 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
                RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);

                ol_flags = tx_pkt->ol_flags;
-               l2_len = tx_pkt->l2_len;
-               l3_len = tx_pkt->l3_len;
-               outer_l2_len = tx_pkt->outer_l2_len;
-               outer_l3_len = tx_pkt->outer_l3_len;
+               tx_offload.l2_len = tx_pkt->l2_len;
+               tx_offload.l3_len = tx_pkt->l3_len;
+               tx_offload.l4_len = tx_pkt->l4_len;
+               tx_offload.tso_segsz = tx_pkt->tso_segsz;
+               tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
+               tx_offload.outer_l3_len = tx_pkt->outer_l3_len;

                /* Calculate the number of context descriptors needed. */
                nb_ctx = i40e_calc_context_desc(ol_flags);
@@ -1273,8 +1323,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
                /* Enable checksum offloading */
                cd_tunneling_params = 0;
                i40e_txd_enable_checksum(ol_flags, &td_cmd, &td_offset,
-                                               l2_len, l3_len, outer_l2_len,
-                                               outer_l3_len,
+                                               tx_offload,
                                                &cd_tunneling_params);

                if (unlikely(nb_ctx)) {
@@ -1292,12 +1341,18 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
                                rte_pktmbuf_free_seg(txe->mbuf);
                                txe->mbuf = NULL;
                        }
-#ifdef RTE_LIBRTE_IEEE1588
-                       if (ol_flags & PKT_TX_IEEE1588_TMST)
+                       /* TSO enabled means no timestamp */
+                       if (ol_flags & PKT_TX_TCP_SEG) {
                                cd_type_cmd_tso_mss |=
-                                       ((uint64_t)I40E_TX_CTX_DESC_TSYN <<
-                                               I40E_TXD_CTX_QW1_CMD_SHIFT);
+                                       i40e_set_tso_ctx(tx_pkt, tx_offload);
+                       } else {
+#ifdef RTE_LIBRTE_IEEE1588
+                               if (ol_flags & PKT_TX_IEEE1588_TMST)
+                                       cd_type_cmd_tso_mss |=
+                                               
((uint64_t)I40E_TX_CTX_DESC_TSYN <<
+                                                       
I40E_TXD_CTX_QW1_CMD_SHIFT);
 #endif
+                       }
                        ctx_txd->tunneling_params =
                                rte_cpu_to_le_32(cd_tunneling_params);
                        ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.h b/lib/librte_pmd_i40e/i40e_rxtx.h
index af932e3..4c81a24 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.h
+++ b/lib/librte_pmd_i40e/i40e_rxtx.h
@@ -154,6 +154,19 @@ struct i40e_tx_queue {
        bool tx_deferred_start; /**< don't start this queue in dev start */
 };

+/** Offload features */
+union i40e_tx_offload {
+       uint64_t data;
+       struct {
+               uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+               uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+               uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+               uint64_t tso_segsz:16; /**< TCP TSO segment size */
+               uint64_t outer_l2_len:8; /**< L2 outer Header Length */
+               uint64_t outer_l3_len:16; /**< L2 outer Header Length */
+       };
+};
+
 int i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);
-- 
1.7.7.6

Reply via email to