On Tue, Oct 11, 2022 at 04:16:15PM +0100, Stuart Henderson wrote: > On 2022/10/11 15:03, Moritz Buhl wrote: > > Here is a new diff for checksum offloading (ipv4, udp, tcp) for em(4). > > > > The previous diff didn't implement hardware vlan tagging for >em82578 > > which should result in variable ethernet header lengths and thus > > wrong checksums inserted at wrong places. > > > > The diff below addresses this. > > I would appreciate further testing reports with different controllers. > > > > mbuhl > > I tried this on my laptop which has I219-V em (I run it in a trunk > with iwm). It breaks tx (packets don't show up on the other side). > rx seems ok.
The following diff will restrict the usage of the advanced descriptors to 82575, 82576, i350 and i210, and fix what the last diff broke for i219. Index: dev/pci/if_em.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em.c,v retrieving revision 1.362 diff -u -p -r1.362 if_em.c --- dev/pci/if_em.c 23 Jun 2022 09:38:28 -0000 1.362 +++ dev/pci/if_em.c 11 Oct 2022 16:05:43 -0000 @@ -37,6 +37,8 @@ POSSIBILITY OF SUCH DAMAGE. #include <dev/pci/if_em.h> #include <dev/pci/if_em_soc.h> +#include <netinet/ip6.h> + /********************************************************************* * Driver version *********************************************************************/ @@ -278,6 +280,8 @@ void em_receive_checksum(struct em_softc struct mbuf *); u_int em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, u_int32_t *); +u_int em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, + u_int32_t *); void em_iff(struct em_softc *); void em_update_link_status(struct em_softc *); int em_get_buf(struct em_queue *, int); @@ -1220,10 +1224,9 @@ em_encap(struct em_queue *que, struct mb BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } - if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 && - sc->hw.mac_type != em_82576 && - sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 && - sc->hw.mac_type != em_i350) { + if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { + used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower); + } else if (sc->hw.mac_type >= em_82543) { used += em_transmit_checksum_setup(que, m, head, &txd_upper, &txd_lower); } else { @@ -1278,7 +1281,7 @@ em_encap(struct em_queue *que, struct mb #if NVLAN > 0 /* Find out if we are in VLAN mode */ - if (m->m_flags & M_VLANTAG) { + if (m->m_flags & M_VLANTAG && sc->hw.mac_type < em_82575) { /* Set the VLAN id */ desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag); @@ -1964,17 +1967,14 @@ em_setup_interface(struct em_softc *sc) ifp->if_capabilities = IFCAP_VLAN_MTU; #if NVLAN > 0 - if (sc->hw.mac_type != em_82575 && sc->hw.mac_type != em_82580 && - sc->hw.mac_type != em_82576 && - sc->hw.mac_type != em_i210 && sc->hw.mac_type != em_i350) - ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; #endif - if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 && - sc->hw.mac_type != em_82576 && - sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 && - sc->hw.mac_type != em_i350) + if (sc->hw.mac_type >= em_82543) { + ifp->if_capabilities |= IFCAP_CSUM_IPv4; ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4; + ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; + } /* * Specify the media types supported by this adapter and register @@ -2391,6 +2391,108 @@ em_free_transmit_structures(struct em_so } } +u_int +em_tx_ctx_setup(struct em_queue *que, struct mbuf *mp, u_int head, + u_int32_t *olinfo_status, u_int32_t *cmd_type_len) +{ + struct e1000_adv_tx_context_desc *TD; + struct ether_header *eh = mtod(mp, struct ether_header *); + struct mbuf *m; + uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; + int off = 0, hoff; + uint8_t ipproto, iphlen; + + *olinfo_status = 0; + *cmd_type_len = 0; + TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head]; + +#if NVLAN > 0 + if (ISSET(mp->m_flags, M_VLANTAG)) { + uint16_t vtag = htole16(mp->m_pkthdr.ether_vtag); + vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT; + *cmd_type_len |= E1000_ADVTXD_DCMD_VLE; + off = 1; + } +#endif + + vlan_macip_lens |= (sizeof(*eh) << E1000_ADVTXD_MACLEN_SHIFT); + + switch (ntohs(eh->ether_type)) { + case ETHERTYPE_IP: { + struct ip *ip; + + m = m_getptr(mp, sizeof(*eh), &hoff); + ip = (struct ip *)(mtod(m, caddr_t) + hoff); + + iphlen = ip->ip_hl << 2; + ipproto = ip->ip_p; + + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; + if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) { + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; + off = 1; + } + + break; + } +#ifdef INET6 + case ETHERTYPE_IPV6: { + struct ip6_hdr *ip6; + + m = m_getptr(mp, sizeof(*eh), &hoff); + ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); + + iphlen = sizeof(*ip6); + ipproto = ip6->ip6_nxt; + + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; + break; + } +#endif + default: + iphlen = 0; + ipproto = 0; + break; + } + + *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS; + *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT; + *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT; + vlan_macip_lens |= iphlen; + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + switch (ipproto) { + case IPPROTO_TCP: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) { + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + off = 1; + } + break; + case IPPROTO_UDP: + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; + if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) { + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + off = 1; + } + break; + } + + if (!off) + return 0; + + /* 82575 needs the queue index added */ + if (que->sc->hw.mac_type == em_82575) + mss_l4len_idx |= (que->me & 0xff) << 4; + + htolem32(&TD->vlan_macip_lens, vlan_macip_lens); + htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl); + htolem32(&TD->u.seqnum_seed, 0); + htolem32(&TD->mss_l4len_idx, mss_l4len_idx); + + return 1; +} + /********************************************************************* * * The offload context needs to be set when we transfer the first @@ -2403,51 +2505,93 @@ em_transmit_checksum_setup(struct em_que u_int32_t *txd_upper, u_int32_t *txd_lower) { struct em_context_desc *TXD; + XSUM_CONTEXT_T off = OFFLOAD_NONE; + uint8_t tucss = 0; + uint8_t tucso = 0; + + *txd_upper = 0; + *txd_lower = 0; + + if (mp->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) { + *txd_upper |= E1000_TXD_POPTS_IXSM << 8; + *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; + off = OFFLOAD_IP; + } if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { - *txd_upper = E1000_TXD_POPTS_TXSM << 8; - *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; - if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) - return (0); - else - que->tx.active_checksum_context = OFFLOAD_TCP_IP; + *txd_upper |= E1000_TXD_POPTS_TXSM << 8; + *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; + + if (que->tx.active_checksum_context == OFFLOAD_TCP_IPv6 && + off != OFFLOAD_IP) { + return 0; + } else if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) + return 0; + + if (off == OFFLOAD_IP) { + off = OFFLOAD_TCP_IP; + tucss = ETHER_HDR_LEN + sizeof(struct ip); + tucso = tucss + offsetof(struct tcphdr, th_sum); + } else { + off = OFFLOAD_TCP_IPv6; + tucss = ETHER_HDR_LEN + sizeof(struct ip6_hdr); + tucso = tucss + offsetof(struct tcphdr, th_sum); + } + + que->tx.active_checksum_context = off; } else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { - *txd_upper = E1000_TXD_POPTS_TXSM << 8; - *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; - if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) + *txd_upper |= E1000_TXD_POPTS_TXSM << 8; + *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; + + if (que->tx.active_checksum_context == OFFLOAD_UDP_IPv6 && + off != OFFLOAD_IP) { + return 0; + } else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) + return 0; + + if (off == OFFLOAD_IP) { + off = OFFLOAD_UDP_IP; + tucss = ETHER_HDR_LEN + sizeof(struct ip); + tucso = tucss + offsetof(struct udphdr, uh_sum); + } else { + off = OFFLOAD_UDP_IPv6; + tucss = ETHER_HDR_LEN + sizeof(struct ip6_hdr); + tucso = tucss + offsetof(struct udphdr, uh_sum); + } + + que->tx.active_checksum_context = off; + } else if (off == OFFLOAD_IP) { + if (que->tx.active_checksum_context == OFFLOAD_IP) return (0); else - que->tx.active_checksum_context = OFFLOAD_UDP_IP; - } else { - *txd_upper = 0; - *txd_lower = 0; - return (0); + que->tx.active_checksum_context = OFFLOAD_IP; } + if (off == OFFLOAD_NONE) + return 0; + /* If we reach this point, the checksum offload context * needs to be reset. */ + TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head]; - TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN; - TXD->lower_setup.ip_fields.ipcso = - ETHER_HDR_LEN + offsetof(struct ip, ip_sum); - TXD->lower_setup.ip_fields.ipcse = - htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1); + if (off == OFFLOAD_IP || off == OFFLOAD_TCP_IP || + off == OFFLOAD_UDP_IP) { + TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN; + TXD->lower_setup.ip_fields.ipcso = + ETHER_HDR_LEN + offsetof(struct ip, ip_sum); + TXD->lower_setup.ip_fields.ipcse = + htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1); + } else { + TXD->lower_setup.ip_fields.ipcss = 0; + TXD->lower_setup.ip_fields.ipcso = 0; + TXD->lower_setup.ip_fields.ipcse = 0; + } - TXD->upper_setup.tcp_fields.tucss = - ETHER_HDR_LEN + sizeof(struct ip); + TXD->upper_setup.tcp_fields.tucss = tucss; + TXD->upper_setup.tcp_fields.tucso = tucso; TXD->upper_setup.tcp_fields.tucse = htole16(0); - - if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) { - TXD->upper_setup.tcp_fields.tucso = - ETHER_HDR_LEN + sizeof(struct ip) + - offsetof(struct tcphdr, th_sum); - } else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) { - TXD->upper_setup.tcp_fields.tucso = - ETHER_HDR_LEN + sizeof(struct ip) + - offsetof(struct udphdr, uh_sum); - } TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT); Index: dev/pci/if_em.h =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em.h,v retrieving revision 1.80 diff -u -p -r1.80 if_em.h --- dev/pci/if_em.h 9 Jan 2022 05:42:50 -0000 1.80 +++ dev/pci/if_em.h 11 Oct 2022 12:59:19 -0000 @@ -290,8 +290,11 @@ struct em_dma_alloc { typedef enum _XSUM_CONTEXT_T { OFFLOAD_NONE, + OFFLOAD_IP, OFFLOAD_TCP_IP, - OFFLOAD_UDP_IP + OFFLOAD_UDP_IP, + OFFLOAD_TCP_IPv6, + OFFLOAD_UDP_IPv6 } XSUM_CONTEXT_T; /* For 82544 PCI-X Workaround */ Index: dev/pci/if_em_hw.h =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v retrieving revision 1.87 diff -u -p -r1.87 if_em_hw.h --- dev/pci/if_em_hw.h 23 Jun 2022 09:38:28 -0000 1.87 +++ dev/pci/if_em_hw.h 11 Oct 2022 12:59:19 -0000 @@ -2123,6 +2123,33 @@ struct em_hw { #define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ #define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ +/* Context descriptors */ +struct e1000_adv_tx_context_desc { + uint32_t vlan_macip_lens; + union { + uint32_t launch_time; + uint32_t seqnum_seed; + } u; + uint32_t type_tucmd_mlhl; + uint32_t mss_l4len_idx; +}; + +/* Adv Transmit Descriptor Config Masks */ +#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ +#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ +#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ +#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ +#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ + +/* Adv Transmit Descriptor Config Masks */ +#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ +#define E1000_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ +#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ +#define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ +#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ +#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ + /* Multiple Receive Queue Control */ #define E1000_MRQC_ENABLE_MASK 0x00000003 #define E1000_MRQC_ENABLE_RSS_2Q 0x00000001