> -----Original Message----- > From: Olivier Matz [mailto:olivier.matz at 6wind.com] > Sent: Monday, November 10, 2014 3:59 PM > To: dev at dpdk.org > Cc: olivier.matz at 6wind.com; Walukiewicz, Miroslaw; Liu, Jijiang; Liu, > Yong; jigsaw at gmail.com; Richardson, Bruce; Ananyev, Konstantin > Subject: [PATCH 07/12] mbuf: generic support for TCP segmentation offload > > Some of the NICs supported by DPDK have a possibility to accelerate TCP > traffic by using segmentation offload. The application prepares a packet > with valid TCP header with size up to 64K and deleguates the > segmentation to the NIC. > > Implement the generic part of TCP segmentation offload in rte_mbuf. It > introduces 2 new fields in rte_mbuf: l4_len (length of L4 header in bytes) > and tso_segsz (MSS of packets). > > To delegate the TCP segmentation to the hardware, the user has to: > > - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies > PKT_TX_TCP_CKSUM) > - set PKT_TX_IP_CKSUM if it's IPv4, and set the IP checksum to 0 in > the packet > - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz > - calculate the pseudo header checksum and set it in the TCP header, > as required when doing hardware TCP checksum offload > > The API is inspired from ixgbe hardware (the next commit adds the > support for ixgbe), but it seems generic enough to be used for other > hw/drivers in the future. > > This commit also reworks the way l2_len and l3_len are used in igb > and ixgbe drivers as the l2_l3_len is not available anymore in mbuf. > > Signed-off-by: Mirek Walukiewicz <miroslaw.walukiewicz at intel.com> > Signed-off-by: Olivier Matz <olivier.matz at 6wind.com> > --- > app/test-pmd/testpmd.c | 3 ++- > examples/ipv4_multicast/main.c | 3 ++- > lib/librte_mbuf/rte_mbuf.h | 44 > +++++++++++++++++++++++---------------- > lib/librte_pmd_e1000/igb_rxtx.c | 11 +++++++++- > lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +++++++++- > 5 files changed, 50 insertions(+), 22 deletions(-) > > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c > index 12adafa..a831e31 100644 > --- a/app/test-pmd/testpmd.c > +++ b/app/test-pmd/testpmd.c > @@ -408,7 +408,8 @@ testpmd_mbuf_ctor(struct rte_mempool *mp, > mb->ol_flags = 0; > mb->data_off = RTE_PKTMBUF_HEADROOM; > mb->nb_segs = 1; > - mb->l2_l3_len = 0; > + mb->l2_len = 0; > + mb->l3_len = 0; > mb->vlan_tci = 0; > mb->hash.rss = 0; > } > diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c > index de5e6be..a31d43d 100644 > --- a/examples/ipv4_multicast/main.c > +++ b/examples/ipv4_multicast/main.c > @@ -302,7 +302,8 @@ mcast_out_pkt(struct rte_mbuf *pkt, int use_clone) > /* copy metadata from source packet*/ > hdr->port = pkt->port; > hdr->vlan_tci = pkt->vlan_tci; > - hdr->l2_l3_len = pkt->l2_l3_len; > + hdr->l2_len = pkt->l2_len; > + hdr->l3_len = pkt->l3_len; > hdr->hash = pkt->hash; > > hdr->ol_flags = pkt->ol_flags; > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h > index bcd8996..f76b768 100644 > --- a/lib/librte_mbuf/rte_mbuf.h > +++ b/lib/librte_mbuf/rte_mbuf.h > @@ -126,6 +126,19 @@ extern "C" { > > #define PKT_TX_VXLAN_CKSUM (1ULL << 50) /**< TX checksum of VXLAN computed > by NIC */ > > +/** > + * TCP segmentation offload. To enable this offload feature for a > + * packet to be transmitted on hardware supporting TSO: > + * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies > + * PKT_TX_TCP_CKSUM) > + * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum > + * to 0 in the packet > + * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz > + * - calculate the pseudo header checksum and set it in the TCP header, > + * as required when doing hardware TCP checksum offload > + */ > +#define PKT_TX_TCP_SEG (1ULL << 49) > + > /* Use final bit of flags to indicate a control mbuf */ > #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ > > @@ -185,6 +198,7 @@ static inline const char > *rte_get_tx_ol_flag_name(uint64_t mask) > case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM"; > case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST"; > case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM"; > + case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG"; > default: return NULL; > } > } > @@ -264,22 +278,18 @@ struct rte_mbuf { > > /* fields to support TX offloads */ > union { > - uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */ > + uint64_t tx_offload; /**< combined for easy fetch */ > struct { > - uint16_t l3_len:9; /**< L3 (IP) Header Length. */ > - uint16_t l2_len:7; /**< L2 (MAC) Header Length. */ > - }; > - }; > + uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ > + uint64_t l3_len:9; /**< L3 (IP) Header Length. */ > + uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ > + uint64_t tso_segsz:16; /**< TCP TSO segment size */ > > - /* fields for TX offloading of tunnels */ > - union { > - uint16_t inner_l2_l3_len; > - /**< combined inner l2/l3 lengths as single var */ > - struct { > - uint16_t inner_l3_len:9; > - /**< inner L3 (IP) Header Length. */ > - uint16_t inner_l2_len:7; > - /**< inner L2 (MAC) Header Length. */ > + /* fields for TX offloading of tunnels */ > + uint16_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. > */ > + uint16_t inner_l2_len:7; /**< inner L2 (MAC) Hdr > Length. */ > +
Shouldn't these 2 fields be bit fields of uint64_t too? uint64_t inner_l3_len:9; uint64_t inner_l2_len:7; So it fits inot one uint64_t? Konstantin > + /* uint64_t unused:8; */ > }; > }; > } __rte_cache_aligned; > @@ -631,8 +641,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m) > { > m->next = NULL; > m->pkt_len = 0; > - m->l2_l3_len = 0; > - m->inner_l2_l3_len = 0; > + m->tx_offload = 0; > m->vlan_tci = 0; > m->nb_segs = 1; > m->port = 0xff; > @@ -701,8 +710,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf > *mi, struct rte_mbuf *md) > mi->data_len = md->data_len; > mi->port = md->port; > mi->vlan_tci = md->vlan_tci; > - mi->l2_l3_len = md->l2_l3_len; > - mi->inner_l2_l3_len = md->inner_l2_l3_len; > + mi->tx_offload = md->tx_offload; > mi->hash = md->hash; > > mi->next = NULL; > diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c > index dbf5074..0a9447e 100644 > --- a/lib/librte_pmd_e1000/igb_rxtx.c > +++ b/lib/librte_pmd_e1000/igb_rxtx.c > @@ -361,6 +361,13 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf > **tx_pkts, > struct rte_mbuf *tx_pkt; > struct rte_mbuf *m_seg; > union igb_vlan_macip vlan_macip_lens; > + union { > + uint16_t u16; > + struct { > + uint16_t l3_len:9; > + uint16_t l2_len:7; > + }; > + } l2_l3_len; > uint64_t buf_dma_addr; > uint32_t olinfo_status; > uint32_t cmd_type_len; > @@ -398,8 +405,10 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf > **tx_pkts, > tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1); > > ol_flags = tx_pkt->ol_flags; > + l2_l3_len.l2_len = tx_pkt->l2_len; > + l2_l3_len.l3_len = tx_pkt->l3_len; > vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci; > - vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len; > + vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16; > tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | > PKT_TX_L4_MASK); > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c > index 70ca254..54a0fc1 100644 > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c > @@ -540,6 +540,13 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf > **tx_pkts, > struct rte_mbuf *tx_pkt; > struct rte_mbuf *m_seg; > union ixgbe_vlan_macip vlan_macip_lens; > + union { > + uint16_t u16; > + struct { > + uint16_t l3_len:9; > + uint16_t l2_len:7; > + }; > + } l2_l3_len; > uint64_t buf_dma_addr; > uint32_t olinfo_status; > uint32_t cmd_type_len; > @@ -583,8 +590,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf > **tx_pkts, > tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | > PKT_TX_L4_MASK); > if (tx_ol_req) { > + l2_l3_len.l2_len = tx_pkt->l2_len; > + l2_l3_len.l3_len = tx_pkt->l3_len; > vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci; > - vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len; > + vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16; > > /* If new context need be built or reuse the exist ctx. > */ > ctx = what_advctx_update(txq, tx_ol_req, > -- > 2.1.0