On Sun, Jan 28, 2024 at 07:46:29PM +0100, Marcus Glocker wrote:
> Anyway, the TSO support just has been backed out.  Thanks again for all
> your testing!

I am still interested to get em with TSO working if possible.  Most
use cases work fine.  If there is a bug in our driver, we may fix
it.  If it is hardware bug, we should identitfy the broken chip
revisions.

Here is the backed out em TSO diff together with the TCP header
diff for sparc64.

Kurt, could you still test this in your next sparc64 build?

bluhm

Index: dev/pci/if_em.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_em.c,v
diff -u -p -r1.371 if_em.c
--- dev/pci/if_em.c     28 Jan 2024 18:42:58 -0000      1.371
+++ dev/pci/if_em.c     29 Jan 2024 14:37:36 -0000
@@ -291,6 +291,8 @@ void em_receive_checksum(struct em_softc
                         struct mbuf *);
 u_int  em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
            u_int32_t *, u_int32_t *);
+u_int  em_tso_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
+           u_int32_t *);
 u_int  em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
            u_int32_t *);
 void em_iff(struct em_softc *);
@@ -1188,7 +1190,7 @@ em_flowstatus(struct em_softc *sc)
  *
  *  This routine maps the mbufs to tx descriptors.
  *
- *  return 0 on success, positive on failure
+ *  return 0 on failure, positive on success
  **********************************************************************/
 u_int
 em_encap(struct em_queue *que, struct mbuf *m)
@@ -1236,7 +1238,15 @@ em_encap(struct em_queue *que, struct mb
        }
 
        if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
-               used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
+               if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+                       used += em_tso_setup(que, m, head, &txd_upper,
+                           &txd_lower);
+                       if (!used)
+                               return (used);
+               } else {
+                       used += em_tx_ctx_setup(que, m, head, &txd_upper,
+                           &txd_lower);
+               }
        } else if (sc->hw.mac_type >= em_82543) {
                used += em_transmit_checksum_setup(que, m, head,
                    &txd_upper, &txd_lower);
@@ -1569,6 +1579,21 @@ em_update_link_status(struct em_softc *s
                ifp->if_link_state = link_state;
                if_link_state_change(ifp);
        }
+
+       /* Disable TSO for 10/100 speeds to avoid some hardware issues */
+       switch (sc->link_speed) {
+       case SPEED_10:
+       case SPEED_100:
+               if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
+                       ifp->if_capabilities &= ~IFCAP_TSOv4;
+                       ifp->if_capabilities &= ~IFCAP_TSOv6;
+               }
+               break;
+       case SPEED_1000:
+               if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210)
+                       ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
+               break;
+       }
 }
 
 /*********************************************************************
@@ -1988,6 +2013,7 @@ em_setup_interface(struct em_softc *sc)
        if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
                ifp->if_capabilities |= IFCAP_CSUM_IPv4;
                ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+               ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
        }
 
        /* 
@@ -2231,9 +2257,9 @@ em_setup_transmit_structures(struct em_s
 
                for (i = 0; i < sc->sc_tx_slots; i++) {
                        pkt = &que->tx.sc_tx_pkts_ring[i];
-                       error = bus_dmamap_create(sc->sc_dmat, 
MAX_JUMBO_FRAME_SIZE,
+                       error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE,
                            EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
-                           MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, 
&pkt->pkt_map);
+                           EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
                        if (error != 0) {
                                printf("%s: Unable to create TX DMA map\n",
                                    DEVNAME(sc));
@@ -2403,6 +2429,81 @@ em_free_transmit_structures(struct em_so
                    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
                    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
        }
+}
+
+u_int
+em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head,
+    u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
+{
+       struct ether_extracted ext;
+       struct e1000_adv_tx_context_desc *TD;
+       uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
+       uint32_t paylen = 0;
+       uint8_t iphlen = 0;
+
+       *olinfo_status = 0;
+       *cmd_type_len = 0;
+       TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
+
+#if NVLAN > 0
+       if (ISSET(mp->m_flags, M_VLANTAG)) {
+               uint32_t vtag = mp->m_pkthdr.ether_vtag;
+               vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
+               *cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
+       }
+#endif
+
+       ether_extract_headers(mp, &ext);
+       if (ext.tcp == NULL)
+               goto out;
+
+       vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
+
+       if (ext.ip4) {
+               iphlen = ext.ip4->ip_hl << 2;
+
+               type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
+               *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
+#ifdef INET6
+       } else if (ext.ip6) {
+               iphlen = sizeof(*ext.ip6);
+
+               type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
+#endif
+       } else {
+               goto out;
+       }
+
+       *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
+       *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE;
+       paylen = mp->m_pkthdr.len - sizeof(*ext.eh) - iphlen -
+           (ext.tcp->th_off << 2);
+       *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
+       vlan_macip_lens |= iphlen;
+       type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+       type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
+       *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+
+       mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT;
+       mss_l4len_idx |= (ext.tcp->th_off << 2) << E1000_ADVTXD_L4LEN_SHIFT;
+       /* 82575 needs the queue index added */
+       if (que->sc->hw.mac_type == em_82575)
+               mss_l4len_idx |= (que->me & 0xff) << 4;
+
+       htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
+       htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
+       htolem32(&TD->u.seqnum_seed, 0);
+       htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
+
+       tcpstat_add(tcps_outpkttso, (paylen + mp->m_pkthdr.ph_mss - 1) /
+           mp->m_pkthdr.ph_mss);
+
+       return 1;
+
+out:
+       tcpstat_inc(tcps_outbadtso);
+       return 0;
 }
 
 u_int
Index: dev/pci/if_em.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_em.h,v
diff -u -p -r1.82 if_em.h
--- dev/pci/if_em.h     28 Jan 2024 18:42:58 -0000      1.82
+++ dev/pci/if_em.h     29 Jan 2024 14:37:36 -0000
@@ -55,11 +55,14 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include <net/if.h>
 #include <net/if_media.h>
+#include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 
 #if NBPFILTER > 0
@@ -269,6 +272,7 @@ typedef int boolean_t;
 
 #define EM_MAX_SCATTER         64
 #define EM_TSO_SIZE            65535
+#define EM_TSO_SEG_SIZE                4096    /* Max dma segment size */
 
 struct em_packet {
        int              pkt_eop;       /* Index of the desc to watch */
Index: dev/pci/if_em_hw.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_em_hw.h,v
diff -u -p -r1.92 if_em_hw.h
--- dev/pci/if_em_hw.h  28 Jan 2024 18:42:58 -0000      1.92
+++ dev/pci/if_em_hw.h  29 Jan 2024 14:37:36 -0000
@@ -2150,6 +2150,7 @@ struct e1000_adv_tx_context_desc {
 #define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
 #define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
 #define E1000_ADVTXD_DCMD_VLE  0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_DCMD_TSE  0x80000000 /* TCP Seg enable */
 #define E1000_ADVTXD_PAYLEN_SHIFT      14 /* Adv desc PAYLEN shift */
 
 /* Adv Transmit Descriptor Config Masks */
@@ -2159,6 +2160,10 @@ struct e1000_adv_tx_context_desc {
 #define E1000_ADVTXD_TUCMD_IPV6                0x00000000  /* IP Packet Type: 
0=IPv6 */
 #define E1000_ADVTXD_TUCMD_L4T_UDP     0x00000000  /* L4 Packet TYPE of UDP */
 #define E1000_ADVTXD_TUCMD_L4T_TCP     0x00000800  /* L4 Packet TYPE of TCP */
+
+/* Req requires Markers and CRC */
+#define E1000_ADVTXD_L4LEN_SHIFT       8  /* Adv ctxt L4LEN shift */
+#define E1000_ADVTXD_MSS_SHIFT         16 /* Adv ctxt MSS shift */
 
 /* Multiple Receive Queue Control */
 #define E1000_MRQC_ENABLE_MASK              0x00000003
Index: netinet/tcp.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp.h,v
diff -u -p -r1.24 tcp.h
--- netinet/tcp.h       19 May 2023 01:04:39 -0000      1.24
+++ netinet/tcp.h       29 Jan 2024 14:37:21 -0000
@@ -51,11 +51,11 @@ struct tcphdr {
        tcp_seq   th_seq;               /* sequence number */
        tcp_seq   th_ack;               /* acknowledgement number */
 #if _BYTE_ORDER == _LITTLE_ENDIAN
-       u_int32_t th_x2:4,              /* (unused) */
+       u_int8_t  th_x2:4,              /* (unused) */
                  th_off:4;             /* data offset */
 #endif
 #if _BYTE_ORDER == _BIG_ENDIAN
-       u_int32_t th_off:4,             /* data offset */
+       u_int8_t  th_off:4,             /* data offset */
                  th_x2:4;              /* (unused) */
 #endif
        u_int8_t  th_flags;

Reply via email to