Hi,
This diff implements TCP Segmentation Offloading for ixl(4). I tested
it successfully on amd64 and sparc64 with Intel X710. It should
increase the TCP bulk performance to 10 Gbit/s. On sparc64 I got an
increase from 600 MBit/s to 2.000 Gbit/s.
Further testing is welcome.
bye,
Jan
Index: dev/pci/if_ixl.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.89
diff -u -p -r1.89 if_ixl.c
--- dev/pci/if_ixl.c 29 Sep 2023 19:44:47 -0000 1.89
+++ dev/pci/if_ixl.c 18 Oct 2023 15:15:30 -0000
@@ -71,6 +71,7 @@
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_media.h>
+#include <net/route.h>
#include <net/toeplitz.h>
#if NBPFILTER > 0
@@ -85,6 +86,8 @@
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
#include <netinet/udp.h>
#include <netinet/if_ether.h>
@@ -827,6 +830,10 @@ struct ixl_tx_desc {
#define IXL_TX_DESC_BSIZE_MASK \
(IXL_TX_DESC_BSIZE_MAX << IXL_TX_DESC_BSIZE_SHIFT)
+#define IXL_TX_CTX_DESC_CMD_TSO 0x10
+#define IXL_TX_CTX_DESC_TLEN_SHIFT 30
+#define IXL_TX_CTX_DESC_MSS_SHIFT 50
+
#define IXL_TX_DESC_L2TAG1_SHIFT 48
} __packed __aligned(16);
@@ -893,11 +900,19 @@ struct ixl_rx_wb_desc_32 {
uint64_t qword3;
} __packed __aligned(16);
-#define IXL_TX_PKT_DESCS 8
+#define IXL_TX_PKT_DESCS 32
#define IXL_TX_QUEUE_ALIGN 128
#define IXL_RX_QUEUE_ALIGN 128
#define IXL_HARDMTU 9712 /* 9726 - ETHER_HDR_LEN */
+#define IXL_TSO_SIZE ((255 * 1024) - 1)
+#define IXL_MAX_DMA_SEG_SIZE ((16 * 1024) - 1)
+
+/*
+ * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES.
+ * This interface could not handle packets greater than IXL_TSO_SIZE.
+ */
+CTASSERT(MAXMCLBYTES < IXL_TSO_SIZE);
#define IXL_PCIREG PCI_MAPREG_START
@@ -1958,6 +1973,7 @@ ixl_attach(struct device *parent, struct
ifp->if_capabilities |= IFCAP_CSUM_IPv4 |
IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+ ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status);
@@ -2603,7 +2619,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi
txm = &maps[i];
if (bus_dmamap_create(sc->sc_dmat,
- IXL_HARDMTU, IXL_TX_PKT_DESCS, IXL_HARDMTU, 0,
+ MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
&txm->txm_map) != 0)
goto uncreate;
@@ -2787,7 +2803,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
}
static uint64_t
-ixl_tx_setup_offload(struct mbuf *m0)
+ixl_tx_setup_offload(struct mbuf *m0, struct ixl_tx_ring *txr,
+ unsigned int prod)
{
struct ether_extracted ext;
uint64_t hlen;
@@ -2800,7 +2817,7 @@ ixl_tx_setup_offload(struct mbuf *m0)
}
if (!ISSET(m0->m_pkthdr.csum_flags,
- M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
+ M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_TCP_TSO))
return (offload);
ether_extract_headers(m0, &ext);
@@ -2833,6 +2850,28 @@ ixl_tx_setup_offload(struct mbuf *m0)
offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT;
}
+ if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) {
+ if (ext.tcp) {
+ struct ixl_tx_desc *ring, *txd;
+ uint64_t cmd = 0;
+
+ hlen += ext.tcp->th_off << 2;
+ ring = IXL_DMA_KVA(&txr->txr_mem);
+ txd = &ring[prod];
+
+ cmd |= IXL_TX_DESC_DTYPE_CONTEXT;
+ cmd |= IXL_TX_CTX_DESC_CMD_TSO;
+ cmd |= (uint64_t)(m0->m_pkthdr.len - ETHER_HDR_LEN
+ - hlen) << IXL_TX_CTX_DESC_TLEN_SHIFT;
+ cmd |= (uint64_t)(m0->m_pkthdr.ph_mss)
+ << IXL_TX_CTX_DESC_MSS_SHIFT;
+
+ htolem64(&txd->addr, 0);
+ htolem64(&txd->cmd, cmd);
+ } else
+ tcpstat_inc(tcps_outbadtso);
+ }
+
return (offload);
}
@@ -2873,7 +2912,8 @@ ixl_start(struct ifqueue *ifq)
mask = sc->sc_tx_ring_ndescs - 1;
for (;;) {
- if (free <= IXL_TX_PKT_DESCS) {
+ /* We need one extra descriptor for TSO packets. */
+ if (free <= (IXL_TX_PKT_DESCS + 1)) {
ifq_set_oactive(ifq);
break;
}
@@ -2882,10 +2922,16 @@ ixl_start(struct ifqueue *ifq)
if (m == NULL)
break;
- offload = ixl_tx_setup_offload(m);
+ offload = ixl_tx_setup_offload(m, txr, prod);
txm = &txr->txr_maps[prod];
map = txm->txm_map;
+
+ if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+ prod++;
+ prod &= mask;
+ free--;
+ }
if (ixl_load_mbuf(sc->sc_dmat, map, m) != 0) {
ifq->ifq_errors++;