Hi Wenzhuo,
> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Wenzhuo Lu
> Sent: Thursday, January 14, 2016 1:39 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2 5/6] ixgbe: support VxLAN & NVGRE TX checksum
> off-load
>
> The patch add VxLAN & NVGRE TX checksum off-load. When the flag of
> outer IP header checksum offload is set, we'll set the context
> descriptor to enable this checksum off-load.
>
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
> ---
> drivers/net/ixgbe/ixgbe_rxtx.c | 52
> ++++++++++++++++++++++++++++++++++--------
> drivers/net/ixgbe/ixgbe_rxtx.h | 6 ++++-
> lib/librte_mbuf/rte_mbuf.h | 2 ++
> 3 files changed, 49 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 512ac3a..fea2495 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -85,7 +85,8 @@
> PKT_TX_VLAN_PKT | \
> PKT_TX_IP_CKSUM | \
> PKT_TX_L4_MASK | \
> - PKT_TX_TCP_SEG)
> + PKT_TX_TCP_SEG | \
> + PKT_TX_OUTER_IP_CKSUM)
I think you also need to update dev_info.tx_offload_capa,
couldn't find where you doing it.
>
> static inline struct rte_mbuf *
> rte_rxmbuf_alloc(struct rte_mempool *mp)
> @@ -364,9 +365,11 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
> uint32_t ctx_idx;
> uint32_t vlan_macip_lens;
> union ixgbe_tx_offload tx_offload_mask;
> + uint32_t seqnum_seed = 0;
>
> ctx_idx = txq->ctx_curr;
> - tx_offload_mask.data = 0;
> + tx_offload_mask.data[0] = 0;
> + tx_offload_mask.data[1] = 0;
> type_tucmd_mlhl = 0;
>
> /* Specify which HW CTX to upload. */
> @@ -430,9 +433,20 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
> }
> }
>
> + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> + tx_offload_mask.outer_l3_len |= ~0;
> + tx_offload_mask.outer_l2_len |= ~0;
> + seqnum_seed |= tx_offload.outer_l3_len
> + << IXGBE_ADVTXD_OUTER_IPLEN;
> + seqnum_seed |= tx_offload.outer_l2_len
> + << IXGBE_ADVTXD_TUNNEL_LEN;
> + }
I don't have an X550 card off-hand, but reading through datasheet - it doesn't
seem right.
When OUTER_IP_CKSUM is enabled MACLEN becomes outer_l2_len and
TUNNEL_LEN should be: outer_l4_len + tunnel_hdr_len + inner_l2_len.
So I think that in our case TUNNEL_LEN should be set to l2_len.
> +
> txq->ctx_cache[ctx_idx].flags = ol_flags;
> - txq->ctx_cache[ctx_idx].tx_offload.data =
> - tx_offload_mask.data & tx_offload.data;
> + txq->ctx_cache[ctx_idx].tx_offload.data[0] =
> + tx_offload_mask.data[0] & tx_offload.data[0];
> + txq->ctx_cache[ctx_idx].tx_offload.data[1] =
> + tx_offload_mask.data[1] & tx_offload.data[1];
> txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
>
> ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
> @@ -441,7 +455,7 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
> vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci <<
> IXGBE_ADVTXD_VLAN_SHIFT);
> ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
> ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
> - ctx_txd->seqnum_seed = 0;
> + ctx_txd->seqnum_seed = seqnum_seed;
> }
>
> /*
> @@ -454,16 +468,24 @@ what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t
> flags,
> {
> /* If match with the current used context */
> if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
> - (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
> - (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data &
> tx_offload.data)))) {
> + (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
> + (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
> + & tx_offload.data[0])) &&
> + (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
> + (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
> + & tx_offload.data[1])))) {
> return txq->ctx_curr;
> }
>
> /* What if match with the next context */
> txq->ctx_curr ^= 1;
> if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
> - (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
> - (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data &
> tx_offload.data)))) {
> + (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
> + (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
> + & tx_offload.data[0])) &&
> + (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
> + (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
> + & tx_offload.data[1])))) {
> return txq->ctx_curr;
> }
>
> @@ -492,6 +514,12 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
> cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
> if (ol_flags & PKT_TX_TCP_SEG)
> cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
> + if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
> + cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
> + if (ol_flags & PKT_TX_VXLAN_PKT)
> + cmdtype &= ~(1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
> + else
> + cmdtype |= (1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
> return cmdtype;
> }
>
> @@ -588,8 +616,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
> uint64_t tx_ol_req;
> uint32_t ctx = 0;
> uint32_t new_ctx;
> - union ixgbe_tx_offload tx_offload = {0};
> + union ixgbe_tx_offload tx_offload;
>
> + tx_offload.data[0] = 0;
> + tx_offload.data[1] = 0;
> txq = tx_queue;
> sw_ring = txq->sw_ring;
> txr = txq->tx_ring;
> @@ -623,6 +653,8 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> tx_offload.l4_len = tx_pkt->l4_len;
> tx_offload.vlan_tci = tx_pkt->vlan_tci;
> tx_offload.tso_segsz = tx_pkt->tso_segsz;
> + tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
> + tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
>
> /* If new context need be built or reuse the exist ctx.
> */
> ctx = what_advctx_update(txq, tx_ol_req,
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 475a800..c15f9fa 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -163,7 +163,7 @@ enum ixgbe_advctx_num {
>
> /** Offload features */
> union ixgbe_tx_offload {
> - uint64_t data;
> + uint64_t data[2];
I wonder what is there any performance impact of increasing size of tx_offload?
> struct {
> uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> @@ -171,6 +171,10 @@ union ixgbe_tx_offload {
> uint64_t tso_segsz:16; /**< TCP TSO segment size */
> uint64_t vlan_tci:16;
> /**< VLAN Tag Control Identifier (CPU order). */
> +
> + /* fields for TX offloading of tunnels */
> + uint64_t outer_l3_len:8; /**< Outer L3 (IP) Hdr Length. */
> + uint64_t outer_l2_len:8; /**< Outer L2 (MAC) Hdr Length. */
> };
> };
>
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 5ad5e59..1bda00e 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -103,6 +103,8 @@ extern "C" {
>
> /* add new TX flags here */
>
> +#define PKT_TX_VXLAN_PKT (1ULL << 48) /**< TX packet is a VxLAN packet.
> */
> +
>From reading X550 spec, I don't really understand what for we need to specify
>is it
GRE or VXLAN packet, so probably we don't need that flag for now at all?
If we really do, might bw worth to organise it like KT_TX_L4_CKSUM (as enum)
and reserve few values for future expansion (2 or 3 bits?).
Konstantin