On Sat, May 13, 2023 at 01:32:07AM +0200, Alexander Bluhm wrote:
> I have not yet investigated where the dropped counter 83 comes from.
> If you see that also, please report what you did.
This is an ENOBUFS error in this chunk.
/* network interface hardware will do TSO */
if (in_ifcap_cksum(*mp, ifp, ifcap)) {
if (ISSET(ifcap, IFCAP_TSOv4)) {
in_hdr_cksum_out(*mp, ifp);
in_proto_cksum_out(*mp, ifp);
}
if (ISSET(ifcap, IFCAP_TSOv6))
in6_proto_cksum_out(*mp, ifp);
if ((error = ifp->if_output(ifp, *mp, dst, rt))) {
tcpstat_inc(tcps_outbadtso);
goto done;
}
tcpstat_inc(tcps_outhwtso);
goto done;
}
As the error from ifp->if_output() has nothing todo with TSO, I
remove the counting there.
Updated diff, please test if you have ix(4) interfaces doing TCP
output.
bluhm
Index: dev/pci/if_ix.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.193
diff -u -p -r1.193 if_ix.c
--- dev/pci/if_ix.c 28 Apr 2023 10:18:57 -0000 1.193
+++ dev/pci/if_ix.c 14 May 2023 09:11:33 -0000
@@ -1924,8 +1924,9 @@ ixgbe_setup_interface(struct ix_softc *s
ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
ifp->if_capabilities |= IFCAP_CSUM_IPv4;
+ ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
if (sc->hw.mac.type != ixgbe_mac_82598EB)
- ifp->if_capabilities |= IFCAP_TSO;
+ ifp->if_capabilities |= IFCAP_LRO;
/*
* Specify the media types supported by this sc and register
@@ -2344,6 +2345,7 @@ ixgbe_initialize_transmit_units(struct i
int i;
uint64_t tdba;
uint32_t txctrl;
+ uint32_t hlreg;
/* Setup the Base and Length of the Tx Descriptor Ring */
@@ -2405,6 +2407,11 @@ ixgbe_initialize_transmit_units(struct i
rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
}
+
+ /* Enable TCP/UDP padding when using TSO */
+ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ hlreg |= IXGBE_HLREG0_TXPADEN;
+ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
}
/*********************************************************************
@@ -2473,16 +2480,18 @@ ixgbe_free_transmit_buffers(struct tx_ri
**********************************************************************/
static inline int
-ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
- uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
+ixgbe_tx_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
+ uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status, uint32_t *cmd_type_len,
+ uint32_t *mss_l4len_idx)
{
struct ether_extracted ext;
int offload = 0;
- uint32_t iphlen;
+ uint32_t ethlen, iphlen;
ether_extract_headers(mp, &ext);
+ ethlen = sizeof(*ext.eh);
- *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+ *vlan_macip_lens |= (ethlen << IXGBE_ADVTXD_MACLEN_SHIFT);
if (ext.ip4) {
iphlen = ext.ip4->ip_hl << 2;
@@ -2500,6 +2509,8 @@ ixgbe_csum_offload(struct mbuf *mp, uint
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
#endif
} else {
+ if (mp->m_pkthdr.csum_flags & M_TCP_TSO)
+ tcpstat_inc(tcps_outbadtso);
return offload;
}
@@ -2519,6 +2530,32 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
}
+ if (mp->m_pkthdr.csum_flags & M_TCP_TSO) {
+ if (ext.tcp) {
+ uint32_t pktlen, hdrlen, thlen, outlen;
+
+ thlen = ext.tcp->th_off << 2;
+
+ *mss_l4len_idx |= (uint32_t)(mp->m_pkthdr.ph_mss
+ << IXGBE_ADVTXD_MSS_SHIFT);
+ *mss_l4len_idx |= thlen << IXGBE_ADVTXD_L4LEN_SHIFT;
+
+ hdrlen = ethlen + iphlen + thlen;
+ pktlen = mp->m_pkthdr.len - hdrlen;
+ CLR(*olinfo_status, IXGBE_ADVTXD_PAYLEN_MASK
+ << IXGBE_ADVTXD_PAYLEN_SHIFT);
+ *olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT;
+
+ *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
+ offload = 1;
+
+ outlen = hdrlen + mp->m_pkthdr.ph_mss;
+ tcpstat_add(tcps_outpkttso,
+ (pktlen + outlen - 1) / outlen);
+ } else
+ tcpstat_inc(tcps_outbadtso);
+ }
+
return offload;
}
@@ -2529,6 +2566,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr,
struct ixgbe_adv_tx_context_desc *TXD;
struct ixgbe_tx_buf *tx_buffer;
uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
+ uint32_t mss_l4len_idx = 0;
int ctxd = txr->next_avail_desc;
int offload = 0;
@@ -2544,8 +2582,8 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr,
}
#endif
- offload |= ixgbe_csum_offload(mp, &vlan_macip_lens, &type_tucmd_mlhl,
- olinfo_status);
+ offload |= ixgbe_tx_offload(mp, &vlan_macip_lens, &type_tucmd_mlhl,
+ olinfo_status, cmd_type_len, &mss_l4len_idx);
if (!offload)
return (0);
@@ -2559,7 +2597,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr,
TXD->vlan_macip_lens = htole32(vlan_macip_lens);
TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
TXD->seqnum_seed = htole32(0);
- TXD->mss_l4len_idx = htole32(0);
+ TXD->mss_l4len_idx = htole32(mss_l4len_idx);
tx_buffer->m_head = NULL;
tx_buffer->eop_index = -1;
@@ -2868,18 +2906,20 @@ ixgbe_initialize_receive_units(struct ix
}
IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
- /* Always enable jumbo frame reception */
hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ /* Always enable jumbo frame reception */
hlreg |= IXGBE_HLREG0_JUMBOEN;
+ /* Always enable CRC stripping */
+ hlreg |= IXGBE_HLREG0_RXCRCSTRP;
IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
- if (ISSET(ifp->if_xflags, IFXF_TSO)) {
+ if (ISSET(ifp->if_xflags, IFXF_LRO)) {
rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
/* This field has to be set to zero. */
rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
- /* Enable TSO Receive Offloading */
+ /* RSC Coalescing on ACK Change */
rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
rdrxctl |= IXGBE_RDRXCTL_FCOE_WRFIX;
@@ -2902,10 +2942,10 @@ ixgbe_initialize_receive_units(struct ix
srrctl = bufsz | IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
- if (ISSET(ifp->if_xflags, IFXF_TSO)) {
+ if (ISSET(ifp->if_xflags, IFXF_LRO)) {
rdrxctl = IXGBE_READ_REG(&sc->hw, IXGBE_RSCCTL(i));
- /* Enable TSO Receive Side Coalescing */
+ /* Enable Receive Side Coalescing */
rdrxctl |= IXGBE_RSCCTL_RSCEN;
rdrxctl |= IXGBE_RSCCTL_MAXDESC_16;
@@ -3263,7 +3303,7 @@ ixgbe_setup_vlan_hw_support(struct ix_so
* We have to disable VLAN striping when using TCP offloading, due to a
* firmware bug.
*/
- if (ISSET(ifp->if_xflags, IFXF_TSO)) {
+ if (ISSET(ifp->if_xflags, IFXF_LRO)) {
sc->vlan_stripping = 0;
return;
}
Index: dev/pci/ixgbe.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/ixgbe.h,v
retrieving revision 1.33
diff -u -p -r1.33 ixgbe.h
--- dev/pci/ixgbe.h 8 Feb 2022 03:38:00 -0000 1.33
+++ dev/pci/ixgbe.h 14 May 2023 09:11:33 -0000
@@ -60,12 +60,18 @@
#include <net/if.h>
#include <net/if_media.h>
+#include <net/route.h>
#include <net/toeplitz.h>
+struct tdb;
+
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
#if NBPFILTER > 0
#include <net/bpf.h>
Index: dev/pci/ixgbe_type.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/ixgbe_type.h,v
retrieving revision 1.36
diff -u -p -r1.36 ixgbe_type.h
--- dev/pci/ixgbe_type.h 9 Jan 2022 05:42:56 -0000 1.36
+++ dev/pci/ixgbe_type.h 14 May 2023 09:11:33 -0000
@@ -3355,6 +3355,7 @@ struct ixgbe_adv_tx_context_desc {
/* 1st&Last TSO-full iSCSI PDU */
#define IXGBE_ADVTXD_POPTS_ISCO_FULL 0x00001800
#define IXGBE_ADVTXD_POPTS_RSV 0x00002000 /* POPTS Reserved */
+#define IXGBE_ADVTXD_PAYLEN_MASK 0x0003FFFF /* Adv desc PAYLEN */
#define IXGBE_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
#define IXGBE_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */
#define IXGBE_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift
*/
Index: net/if.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.h,v
retrieving revision 1.211
diff -u -p -r1.211 if.h
--- net/if.h 7 Mar 2023 20:09:48 -0000 1.211
+++ net/if.h 14 May 2023 09:11:33 -0000
@@ -231,7 +231,7 @@ struct if_status_description {
#define IFXF_INET6_NOSOII 0x40 /* [N] don't do RFC 7217 */
#define IFXF_AUTOCONF4 0x80 /* [N] v4 autoconf (aka dhcp)
enabled */
#define IFXF_MONITOR 0x100 /* [N] only used for bpf */
-#define IFXF_TSO 0x200 /* [N] TCP segment offloading */
+#define IFXF_LRO 0x200 /* [N] TCP large recv offload */
#define IFXF_CANTCHANGE \
(IFXF_MPSAFE|IFXF_CLONED)
@@ -251,11 +251,17 @@ struct if_status_description {
#define IFCAP_VLAN_HWTAGGING 0x00000020 /* hardware VLAN tag
support */
#define IFCAP_CSUM_TCPv6 0x00000080 /* can do IPv6/TCP
checksums */
#define IFCAP_CSUM_UDPv6 0x00000100 /* can do IPv6/UDP
checksums */
-#define IFCAP_TSO 0x00004000 /* TCP segment
offloading */
+#define IFCAP_LRO 0x00001000 /* TCP large recv
offload */
+#define IFCAP_TSOv4 0x00002000 /* TCP segmentation
offload */
+#define IFCAP_TSOv6 0x00004000 /* TCP segmentation
offload */
#define IFCAP_WOL 0x00008000 /* can do wake on lan */
#define IFCAP_CSUM_MASK (IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | \
IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6)
+
+/* XXX feature flags are misnamed */
+#define IFCAP_TSO IFCAP_LRO
+#define IFXF_TSO IFXF_LRO
/* symbolic names for terminal (per-protocol) CTL_IFQ_ nodes */
#define IFQCTL_LEN 1
Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1179
diff -u -p -r1.1179 pf.c
--- net/pf.c 13 May 2023 13:35:17 -0000 1.1179
+++ net/pf.c 14 May 2023 09:11:33 -0000
@@ -6555,15 +6555,9 @@ pf_route(struct pf_pdesc *pd, struct pf_
goto done;
}
- if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) &&
- m0->m_pkthdr.ph_mss <= ifp->if_mtu) {
- if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) ||
- if_output_ml(ifp, &ml, sintosa(dst), rt))
- goto done;
- tcpstat_inc(tcps_outswtso);
+ if (tcp_if_output_tso(ifp, &m0, sintosa(dst), rt,
+ IFCAP_TSOv4, ifp->if_mtu) || m0 == NULL)
goto done;
- }
- CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO);
/*
* Too large for interface; fragment if possible.
@@ -6598,7 +6592,6 @@ void
pf_route6(struct pf_pdesc *pd, struct pf_state *st)
{
struct mbuf *m0;
- struct mbuf_list ml;
struct sockaddr_in6 *dst, sin6;
struct rtentry *rt = NULL;
struct ip6_hdr *ip6;
@@ -6696,15 +6689,9 @@ pf_route6(struct pf_pdesc *pd, struct pf
goto done;
}
- if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) &&
- m0->m_pkthdr.ph_mss <= ifp->if_mtu) {
- if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) ||
- if_output_ml(ifp, &ml, sin6tosa(dst), rt))
- goto done;
- tcpstat_inc(tcps_outswtso);
+ if (tcp_if_output_tso(ifp, &m0, sin6tosa(dst), rt,
+ IFCAP_TSOv6, ifp->if_mtu) || m0 == NULL)
goto done;
- }
- CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO);
ip6stat_inc(ip6s_cantfrag);
if (st->rt != PF_DUPTO)
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.386
diff -u -p -r1.386 ip_output.c
--- netinet/ip_output.c 13 May 2023 13:35:17 -0000 1.386
+++ netinet/ip_output.c 14 May 2023 09:11:33 -0000
@@ -460,15 +460,10 @@ sendit:
goto done;
}
- if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) &&
- m->m_pkthdr.ph_mss <= mtu) {
- if ((error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss)) ||
- (error = if_output_ml(ifp, &ml, sintosa(dst), ro->ro_rt)))
- goto done;
- tcpstat_inc(tcps_outswtso);
+ error = tcp_if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt,
+ IFCAP_TSOv4, mtu);
+ if (error || m == NULL)
goto done;
- }
- CLR(m->m_pkthdr.csum_flags, M_TCP_TSO);
/*
* Too large for interface; fragment if possible.
@@ -1887,10 +1882,15 @@ in_proto_cksum_out(struct mbuf *m, struc
u_int16_t csum = 0, offset;
offset = ip->ip_hl << 2;
- if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
+ if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+ csum = in_cksum_phdr(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htonl(ip->ip_p));
+ } else if (ISSET(m->m_pkthdr.csum_flags,
+ M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) {
csum = in_cksum_phdr(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) -
offset + ip->ip_p));
+ }
if (ip->ip_p == IPPROTO_TCP)
offset += offsetof(struct tcphdr, th_sum);
else if (ip->ip_p == IPPROTO_UDP)
Index: netinet/tcp_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.137
diff -u -p -r1.137 tcp_output.c
--- netinet/tcp_output.c 13 May 2023 13:35:18 -0000 1.137
+++ netinet/tcp_output.c 14 May 2023 09:20:15 -0000
@@ -80,6 +80,7 @@
#include <sys/kernel.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#if NPF > 0
#include <net/pfvar.h>
@@ -753,7 +754,7 @@ send:
/* Enable TSO and specify the size of the resulting segments. */
if (tso) {
- m->m_pkthdr.csum_flags |= M_TCP_TSO;
+ SET(m->m_pkthdr.csum_flags, M_TCP_TSO);
m->m_pkthdr.ph_mss = tp->t_maxseg;
}
@@ -1347,5 +1348,45 @@ tcp_chopper(struct mbuf *m0, struct mbuf
bad:
tcpstat_inc(tcps_outbadtso);
ml_purge(ml);
+ return error;
+}
+
+int
+tcp_if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
+ struct rtentry *rt, uint32_t ifcap, u_int mtu)
+{
+ struct mbuf_list ml;
+ int error;
+
+ /* caller must fail later or fragment */
+ if (!ISSET((*mp)->m_pkthdr.csum_flags, M_TCP_TSO))
+ return 0;
+ if ((*mp)->m_pkthdr.ph_mss > mtu) {
+ CLR((*mp)->m_pkthdr.csum_flags, M_TCP_TSO);
+ return 0;
+ }
+
+ /* network interface hardware will do TSO */
+ if (in_ifcap_cksum(*mp, ifp, ifcap)) {
+ if (ISSET(ifcap, IFCAP_TSOv4)) {
+ in_hdr_cksum_out(*mp, ifp);
+ in_proto_cksum_out(*mp, ifp);
+ }
+ if (ISSET(ifcap, IFCAP_TSOv6))
+ in6_proto_cksum_out(*mp, ifp);
+ if ((error = ifp->if_output(ifp, *mp, dst, rt)))
+ goto done;
+ tcpstat_inc(tcps_outhwtso);
+ goto done;
+ }
+
+ /* as fallback do TSO in software */
+ if ((error = tcp_chopper(*mp, &ml, ifp, (*mp)->m_pkthdr.ph_mss)) ||
+ (error = if_output_ml(ifp, &ml, dst, rt)))
+ goto done;
+ tcpstat_inc(tcps_outswtso);
+
+ done:
+ *mp = NULL;
return error;
}
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.164
diff -u -p -r1.164 tcp_var.h
--- netinet/tcp_var.h 10 May 2023 12:07:16 -0000 1.164
+++ netinet/tcp_var.h 14 May 2023 09:11:33 -0000
@@ -719,6 +719,8 @@ struct tcpcb *
void tcp_notify(struct inpcb *, int);
int tcp_output(struct tcpcb *);
int tcp_chopper(struct mbuf *, struct mbuf_list *, struct ifnet *, u_int);
+int tcp_if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *,
+ struct rtentry *, uint32_t, u_int);
void tcp_pulloutofband(struct socket *, u_int, struct mbuf *, int);
int tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *);
void tcp_rscale(struct tcpcb *, u_long);
Index: netinet6/ip6_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.275
diff -u -p -r1.275 ip6_output.c
--- netinet6/ip6_output.c 10 May 2023 12:07:17 -0000 1.275
+++ netinet6/ip6_output.c 14 May 2023 09:11:33 -0000
@@ -706,15 +706,10 @@ reroute:
goto done;
}
- if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) &&
- m->m_pkthdr.ph_mss <= mtu) {
- if ((error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss)) ||
- (error = if_output_ml(ifp, &ml, sin6tosa(dst), ro->ro_rt)))
- goto done;
- tcpstat_inc(tcps_outswtso);
+ error = tcp_if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt,
+ IFCAP_TSOv6, mtu);
+ if (error || m == NULL)
goto done;
- }
- CLR(m->m_pkthdr.csum_flags, M_TCP_TSO);
/*
* try to fragment the packet. case 1-b
@@ -2715,8 +2710,13 @@ in6_proto_cksum_out(struct mbuf *m, stru
u_int16_t csum;
offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
- csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst,
- htonl(m->m_pkthdr.len - offset), htonl(nxt));
+ if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+ csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst,
+ htonl(0), htonl(nxt));
+ } else {
+ csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst,
+ htonl(m->m_pkthdr.len - offset), htonl(nxt));
+ }
if (nxt == IPPROTO_TCP)
offset += offsetof(struct tcphdr, th_sum);
else if (nxt == IPPROTO_UDP)