On Tue, Oct 11, 2022 at 04:16:15PM +0100, Stuart Henderson wrote:
> On 2022/10/11 15:03, Moritz Buhl wrote:
> > Here is a new diff for checksum offloading (ipv4, udp, tcp) for em(4).
> > 
> > The previous diff didn't implement hardware vlan tagging for >em82578
> > which should result in variable ethernet header lengths and thus
> > wrong checksums inserted at wrong places.
> > 
> > The diff below addresses this.
> > I would appreciate further testing reports with different controllers.
> > 
> > mbuhl
> 
> I tried this on my laptop which has I219-V em (I run it in a trunk
> with iwm). It breaks tx (packets don't show up on the other side).
> rx seems ok.

The following diff will restrict the usage of the advanced 
descriptors to 82575, 82576, i350 and i210, and fix what the
last diff broke for i219.

Index: dev/pci/if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.362
diff -u -p -r1.362 if_em.c
--- dev/pci/if_em.c     23 Jun 2022 09:38:28 -0000      1.362
+++ dev/pci/if_em.c     11 Oct 2022 16:05:43 -0000
@@ -37,6 +37,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <dev/pci/if_em.h>
 #include <dev/pci/if_em_soc.h>
 
+#include <netinet/ip6.h>
+
 /*********************************************************************
  *  Driver version
  *********************************************************************/
@@ -278,6 +280,8 @@ void em_receive_checksum(struct em_softc
                         struct mbuf *);
 u_int  em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
            u_int32_t *, u_int32_t *);
+u_int  em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
+           u_int32_t *);
 void em_iff(struct em_softc *);
 void em_update_link_status(struct em_softc *);
 int  em_get_buf(struct em_queue *, int);
@@ -1220,10 +1224,9 @@ em_encap(struct em_queue *que, struct mb
                    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
        }
 
-       if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
-           sc->hw.mac_type != em_82576 &&
-           sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
-           sc->hw.mac_type != em_i350) {
+       if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
+               used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
+       } else if (sc->hw.mac_type >= em_82543) {
                used += em_transmit_checksum_setup(que, m, head,
                    &txd_upper, &txd_lower);
        } else {
@@ -1278,7 +1281,7 @@ em_encap(struct em_queue *que, struct mb
 
 #if NVLAN > 0
        /* Find out if we are in VLAN mode */
-       if (m->m_flags & M_VLANTAG) {
+       if (m->m_flags & M_VLANTAG && sc->hw.mac_type < em_82575) {
                /* Set the VLAN id */
                desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
 
@@ -1964,17 +1967,14 @@ em_setup_interface(struct em_softc *sc)
        ifp->if_capabilities = IFCAP_VLAN_MTU;
 
 #if NVLAN > 0
-       if (sc->hw.mac_type != em_82575 && sc->hw.mac_type != em_82580 &&
-           sc->hw.mac_type != em_82576 &&
-           sc->hw.mac_type != em_i210 && sc->hw.mac_type != em_i350)
-               ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
+       ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
 #endif
 
-       if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
-           sc->hw.mac_type != em_82576 &&
-           sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
-           sc->hw.mac_type != em_i350)
+       if (sc->hw.mac_type >= em_82543) {
+               ifp->if_capabilities |= IFCAP_CSUM_IPv4;
                ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
+               ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+       }
 
        /* 
         * Specify the media types supported by this adapter and register
@@ -2391,6 +2391,108 @@ em_free_transmit_structures(struct em_so
        }
 }
 
+u_int
+em_tx_ctx_setup(struct em_queue *que, struct mbuf *mp, u_int head,
+    u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
+{
+       struct e1000_adv_tx_context_desc *TD;
+       struct ether_header *eh = mtod(mp, struct ether_header *);
+       struct mbuf *m;
+       uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
+       int off = 0, hoff;
+       uint8_t ipproto, iphlen;
+
+       *olinfo_status = 0;
+       *cmd_type_len = 0;
+       TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
+       
+#if NVLAN > 0
+       if (ISSET(mp->m_flags, M_VLANTAG)) {
+               uint16_t vtag = htole16(mp->m_pkthdr.ether_vtag);
+               vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
+               *cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
+               off = 1;
+       }
+#endif
+
+       vlan_macip_lens |= (sizeof(*eh) << E1000_ADVTXD_MACLEN_SHIFT);
+       
+       switch (ntohs(eh->ether_type)) {
+       case ETHERTYPE_IP: {
+               struct ip *ip;
+
+               m = m_getptr(mp, sizeof(*eh), &hoff);
+               ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+
+               iphlen = ip->ip_hl << 2;
+               ipproto = ip->ip_p;
+
+               type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
+               if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
+                       *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
+                       off = 1;
+               }
+
+               break;
+       }
+#ifdef INET6
+       case ETHERTYPE_IPV6: {
+               struct ip6_hdr *ip6;
+
+               m = m_getptr(mp, sizeof(*eh), &hoff);
+               ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
+
+               iphlen = sizeof(*ip6);
+               ipproto = ip6->ip6_nxt;
+
+               type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
+               break;
+       }
+#endif
+       default:
+               iphlen = 0;
+               ipproto = 0;
+               break;
+       }
+
+       *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
+       *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT;
+       *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
+       vlan_macip_lens |= iphlen;
+       type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+       switch (ipproto) {
+       case IPPROTO_TCP:
+               type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
+               if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
+                       *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+                       off = 1;
+               }
+               break;
+       case IPPROTO_UDP:
+               type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
+               if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
+                       *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+                       off = 1;
+               }
+               break;
+       }
+
+       if (!off)
+               return 0;
+
+       /* 82575 needs the queue index added */
+       if (que->sc->hw.mac_type == em_82575)
+               mss_l4len_idx |= (que->me & 0xff) << 4;
+
+       htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
+       htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
+       htolem32(&TD->u.seqnum_seed, 0);
+       htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
+
+       return 1;
+}
+
 /*********************************************************************
  *
  *  The offload context needs to be set when we transfer the first
@@ -2403,51 +2505,93 @@ em_transmit_checksum_setup(struct em_que
     u_int32_t *txd_upper, u_int32_t *txd_lower)
 {
        struct em_context_desc *TXD;
+       XSUM_CONTEXT_T off = OFFLOAD_NONE;
+       uint8_t tucss = 0;
+       uint8_t tucso = 0;
+
+       *txd_upper = 0;
+       *txd_lower = 0;
+
+       if (mp->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) {
+               *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
+               *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+               off = OFFLOAD_IP;
+       }
 
        if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
-               *txd_upper = E1000_TXD_POPTS_TXSM << 8;
-               *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
-               if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
-                       return (0);
-               else
-                       que->tx.active_checksum_context = OFFLOAD_TCP_IP;
+               *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+               *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+
+               if (que->tx.active_checksum_context == OFFLOAD_TCP_IPv6 &&
+                   off != OFFLOAD_IP) {
+                       return 0;
+               } else if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
+                       return 0;
+
+               if (off == OFFLOAD_IP) {
+                       off = OFFLOAD_TCP_IP;
+                       tucss = ETHER_HDR_LEN + sizeof(struct ip);
+                       tucso = tucss + offsetof(struct tcphdr, th_sum);
+               } else {
+                       off = OFFLOAD_TCP_IPv6;
+                       tucss = ETHER_HDR_LEN + sizeof(struct ip6_hdr);
+                       tucso = tucss + offsetof(struct tcphdr, th_sum);
+               }
+
+               que->tx.active_checksum_context = off;
        } else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
-               *txd_upper = E1000_TXD_POPTS_TXSM << 8;
-               *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
-               if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
+               *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+               *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+
+               if (que->tx.active_checksum_context == OFFLOAD_UDP_IPv6 &&
+                   off != OFFLOAD_IP) {
+                       return 0;
+               } else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
+                       return 0;
+
+               if (off == OFFLOAD_IP) {
+                       off = OFFLOAD_UDP_IP;
+                       tucss = ETHER_HDR_LEN + sizeof(struct ip);
+                       tucso = tucss + offsetof(struct udphdr, uh_sum);
+               } else {
+                       off = OFFLOAD_UDP_IPv6;
+                       tucss = ETHER_HDR_LEN + sizeof(struct ip6_hdr);
+                       tucso = tucss + offsetof(struct udphdr, uh_sum);
+               }
+
+               que->tx.active_checksum_context = off;
+       } else if (off == OFFLOAD_IP) {
+               if (que->tx.active_checksum_context == OFFLOAD_IP)
                        return (0);
                else
-                       que->tx.active_checksum_context = OFFLOAD_UDP_IP;
-       } else {
-               *txd_upper = 0;
-               *txd_lower = 0;
-               return (0);
+                       que->tx.active_checksum_context = OFFLOAD_IP;
        }
 
+       if (off == OFFLOAD_NONE)
+               return 0;
+
        /* If we reach this point, the checksum offload context
         * needs to be reset.
         */
+
        TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
 
-       TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
-       TXD->lower_setup.ip_fields.ipcso = 
-           ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
-       TXD->lower_setup.ip_fields.ipcse = 
-           htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
+       if (off == OFFLOAD_IP || off == OFFLOAD_TCP_IP ||
+           off == OFFLOAD_UDP_IP) {
+               TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
+               TXD->lower_setup.ip_fields.ipcso = 
+                   ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
+               TXD->lower_setup.ip_fields.ipcse = 
+                   htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
+       } else {
+               TXD->lower_setup.ip_fields.ipcss = 0;
+               TXD->lower_setup.ip_fields.ipcso = 0;
+               TXD->lower_setup.ip_fields.ipcse = 0;
+       }
 
-       TXD->upper_setup.tcp_fields.tucss = 
-           ETHER_HDR_LEN + sizeof(struct ip);
+       TXD->upper_setup.tcp_fields.tucss = tucss;
+       TXD->upper_setup.tcp_fields.tucso = tucso;
        TXD->upper_setup.tcp_fields.tucse = htole16(0);
-
-       if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
-               TXD->upper_setup.tcp_fields.tucso = 
-                   ETHER_HDR_LEN + sizeof(struct ip) + 
-                   offsetof(struct tcphdr, th_sum);
-       } else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
-               TXD->upper_setup.tcp_fields.tucso = 
-                   ETHER_HDR_LEN + sizeof(struct ip) + 
-                   offsetof(struct udphdr, uh_sum);
-       }
 
        TXD->tcp_seg_setup.data = htole32(0);
        TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
Index: dev/pci/if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
retrieving revision 1.80
diff -u -p -r1.80 if_em.h
--- dev/pci/if_em.h     9 Jan 2022 05:42:50 -0000       1.80
+++ dev/pci/if_em.h     11 Oct 2022 12:59:19 -0000
@@ -290,8 +290,11 @@ struct em_dma_alloc {
 
 typedef enum _XSUM_CONTEXT_T {
        OFFLOAD_NONE,
+       OFFLOAD_IP,
        OFFLOAD_TCP_IP,
-       OFFLOAD_UDP_IP
+       OFFLOAD_UDP_IP,
+       OFFLOAD_TCP_IPv6,
+       OFFLOAD_UDP_IPv6
 } XSUM_CONTEXT_T;
 
 /* For 82544 PCI-X Workaround */
Index: dev/pci/if_em_hw.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v
retrieving revision 1.87
diff -u -p -r1.87 if_em_hw.h
--- dev/pci/if_em_hw.h  23 Jun 2022 09:38:28 -0000      1.87
+++ dev/pci/if_em_hw.h  11 Oct 2022 12:59:19 -0000
@@ -2123,6 +2123,33 @@ struct em_hw {
 #define E1000_RXCSUM_IPPCSE    0x00001000   /* IP payload checksum enable */
 #define E1000_RXCSUM_PCSD      0x00002000   /* packet checksum disabled */
 
+/* Context descriptors */
+struct e1000_adv_tx_context_desc {
+        uint32_t vlan_macip_lens;
+        union {
+                uint32_t launch_time;
+                uint32_t seqnum_seed;
+        } u;
+        uint32_t type_tucmd_mlhl;
+        uint32_t mss_l4len_idx;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
+#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
+#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
+#define E1000_ADVTXD_DCMD_VLE  0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_PAYLEN_SHIFT      14 /* Adv desc PAYLEN shift */
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_MACLEN_SHIFT      9  /* Adv ctxt desc mac len shift */
+#define E1000_ADVTXD_VLAN_SHIFT                16  /* Adv ctxt vlan tag shift 
*/
+#define E1000_ADVTXD_TUCMD_IPV4                0x00000400  /* IP Packet Type: 
1=IPv4 */
+#define E1000_ADVTXD_TUCMD_IPV6                0x00000000  /* IP Packet Type: 
0=IPv6 */
+#define E1000_ADVTXD_TUCMD_L4T_UDP     0x00000000  /* L4 Packet TYPE of UDP */
+#define E1000_ADVTXD_TUCMD_L4T_TCP     0x00000800  /* L4 Packet TYPE of TCP */
+
 /* Multiple Receive Queue Control */
 #define E1000_MRQC_ENABLE_MASK              0x00000003
 #define E1000_MRQC_ENABLE_RSS_2Q            0x00000001

Reply via email to