Re: IPv4 on ix(4) slow/nothing - 7.4
On Wed, Oct 18, 2023 at 08:53:44PM +0200, Alexander Bluhm wrote: > On Wed, Oct 18, 2023 at 08:19:29PM +0200, Mischa wrote: > > It's indeed something like that: ix -> vlan (tagged) -> veb > > When vlan is added to veb, kernel should disable LRO on ix. > All testing before release did not find this code path :-( > > Is it possible to add vlan to veb first, and then add or change the > vlan parent to ix? If it works, that should also disable LRO. > > Jan said he will have a look tomorrow. > > trunk, carp, ... in veb or bridge might have the same issue. First round of fixes for vlan(4), vxlan(4), nvgre(4) and bpe(4). ok? Index: net/if.c === RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.708 diff -u -p -r1.708 if.c --- net/if.c16 Sep 2023 09:33:27 - 1.708 +++ net/if.c19 Oct 2023 13:03:33 - @@ -3243,6 +3243,17 @@ ifsetlro(struct ifnet *ifp, int on) struct ifreq ifrq; int error = 0; int s = splnet(); + struct if_parent parent; + + memset(&parent, 0, sizeof(parent)); + if ((*ifp->if_ioctl)(ifp, SIOCGIFPARENT, (caddr_t)&parent) != -1) { + struct ifnet *ifp0 = if_unit(parent.ifp_parent); + + if (ifp0 != NULL) { + ifsetlro(ifp0, on); + if_put(ifp0); + } + } if (!ISSET(ifp->if_capabilities, IFCAP_LRO)) { error = ENOTSUP; Index: net/if_bpe.c === RCS file: /cvs/src/sys/net/if_bpe.c,v retrieving revision 1.19 diff -u -p -r1.19 if_bpe.c --- net/if_bpe.c8 Nov 2021 04:54:44 - 1.19 +++ net/if_bpe.c19 Oct 2023 13:20:18 - @@ -631,6 +631,9 @@ bpe_set_parent(struct bpe_softc *sc, con goto put; } + if (ether_brport_isset(ifp)) + ifsetlro(ifp0, 0); + /* commit */ sc->sc_key.k_if = ifp0->if_index; etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); Index: net/if_gre.c === RCS file: /cvs/src/sys/net/if_gre.c,v retrieving revision 1.174 diff -u -p -r1.174 if_gre.c --- net/if_gre.c13 May 2023 13:35:17 - 1.174 +++ net/if_gre.c19 Oct 2023 13:24:56 - @@ -3544,6 +3544,9 @@ nvgre_set_parent(struct nvgre_softc *sc, return (EPROTONOSUPPORT); } + if (ether_brport_isset(&sc->sc_ac.ac_if)) + ifsetlro(ifp0, 0); + /* commit */ sc->sc_ifp0 = ifp0->if_index; if_put(ifp0); Index: net/if_vlan.c === RCS file: /cvs/src/sys/net/if_vlan.c,v retrieving revision 1.215 diff -u -p -r1.215 if_vlan.c --- net/if_vlan.c 16 May 2023 14:32:54 - 1.215 +++ net/if_vlan.c 19 Oct 2023 11:08:23 - @@ -937,6 +937,9 @@ vlan_set_parent(struct vlan_softc *sc, c if (error != 0) goto put; + if (ether_brport_isset(ifp)) + ifsetlro(ifp0, 0); + /* commit */ sc->sc_ifidx0 = ifp0->if_index; if (!ISSET(sc->sc_flags, IFVF_LLADDR)) Index: net/if_vxlan.c === RCS file: /cvs/src/sys/net/if_vxlan.c,v retrieving revision 1.93 diff -u -p -r1.93 if_vxlan.c --- net/if_vxlan.c 3 Aug 2023 09:49:08 - 1.93 +++ net/if_vxlan.c 19 Oct 2023 13:18:47 - @@ -1582,6 +1582,9 @@ vxlan_set_parent(struct vxlan_softc *sc, goto put; } + if (ether_brport_isset(ifp)) + ifsetlro(ifp0, 0); + /* commit */ sc->sc_if_index0 = ifp0->if_index; etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
TSO for ixl(4)
Hi, This diff implements TCP Segmentation Offloading for ixl(4). I tested it successfully on amd64 and sparc64 with Intel X710. It should increase the TCP bulk performance to 10 Gbit/s. On sparc64 I got an increase from 600 MBit/s to 2.000 Gbit/s. Further testing is welcome. bye, Jan Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.89 diff -u -p -r1.89 if_ixl.c --- dev/pci/if_ixl.c29 Sep 2023 19:44:47 - 1.89 +++ dev/pci/if_ixl.c18 Oct 2023 15:15:30 - @@ -71,6 +71,7 @@ #include #include #include +#include #include #if NBPFILTER > 0 @@ -85,6 +86,8 @@ #include #include #include +#include +#include #include #include @@ -827,6 +830,10 @@ struct ixl_tx_desc { #define IXL_TX_DESC_BSIZE_MASK \ (IXL_TX_DESC_BSIZE_MAX << IXL_TX_DESC_BSIZE_SHIFT) +#define IXL_TX_CTX_DESC_CMD_TSO0x10 +#define IXL_TX_CTX_DESC_TLEN_SHIFT 30 +#define IXL_TX_CTX_DESC_MSS_SHIFT 50 + #define IXL_TX_DESC_L2TAG1_SHIFT 48 } __packed __aligned(16); @@ -893,11 +900,19 @@ struct ixl_rx_wb_desc_32 { uint64_tqword3; } __packed __aligned(16); -#define IXL_TX_PKT_DESCS 8 +#define IXL_TX_PKT_DESCS 32 #define IXL_TX_QUEUE_ALIGN 128 #define IXL_RX_QUEUE_ALIGN 128 #define IXL_HARDMTU9712 /* 9726 - ETHER_HDR_LEN */ +#define IXL_TSO_SIZE ((255 * 1024) - 1) +#define IXL_MAX_DMA_SEG_SIZE ((16 * 1024) - 1) + +/* + * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES. + * This interface could not handle packets greater than IXL_TSO_SIZE. + */ +CTASSERT(MAXMCLBYTES < IXL_TSO_SIZE); #define IXL_PCIREG PCI_MAPREG_START @@ -1958,6 +1973,7 @@ ixl_attach(struct device *parent, struct ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status); @@ -2603,7 +2619,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi txm = &maps[i]; if (bus_dmamap_create(sc->sc_dmat, - IXL_HARDMTU, IXL_TX_PKT_DESCS, IXL_HARDMTU, 0, + MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, &txm->txm_map) != 0) goto uncreate; @@ -2787,7 +2803,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm } static uint64_t -ixl_tx_setup_offload(struct mbuf *m0) +ixl_tx_setup_offload(struct mbuf *m0, struct ixl_tx_ring *txr, +unsigned int prod) { struct ether_extracted ext; uint64_t hlen; @@ -2800,7 +2817,7 @@ ixl_tx_setup_offload(struct mbuf *m0) } if (!ISSET(m0->m_pkthdr.csum_flags, - M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) + M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_TCP_TSO)) return (offload); ether_extract_headers(m0, &ext); @@ -2833,6 +2850,28 @@ ixl_tx_setup_offload(struct mbuf *m0) offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT; } + if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) { + if (ext.tcp) { + struct ixl_tx_desc *ring, *txd; + uint64_t cmd = 0; + + hlen += ext.tcp->th_off << 2; + ring = IXL_DMA_KVA(&txr->txr_mem); + txd = &ring[prod]; + + cmd |= IXL_TX_DESC_DTYPE_CONTEXT; + cmd |= IXL_TX_CTX_DESC_CMD_TSO; + cmd |= (uint64_t)(m0->m_pkthdr.len - ETHER_HDR_LEN + - hlen) << IXL_TX_CTX_DESC_TLEN_SHIFT; + cmd |= (uint64_t)(m0->m_pkthdr.ph_mss) + << IXL_TX_CTX_DESC_MSS_SHIFT; + + htolem64(&txd->addr, 0); + htolem64(&txd->cmd, cmd); + } else + tcpstat_inc(tcps_outbadtso); + } + return (offload); } @@ -2873,7 +2912,8 @@ ixl_start(struct ifqueue *ifq) mask = sc->sc_tx_ring_ndescs - 1; for (;;) { - if (free <= IXL_TX_PKT_DESCS) { + /* We need one extra descriptor for TSO packets. */ + if (free <= (IXL_TX_PKT_DESCS + 1)) { ifq_set_oactive(ifq); break; } @@ -2882,10 +2922,16 @@ ixl_start(struct ifqueue *ifq) if (m == NULL) break; - offload = ixl_tx_setup_offload(m); + offload = ixl_tx_setup_offload(m, txr, prod); txm = &t
fix vlan handling with tcplro on ix(4)
Hi, I missed the vlan-tag size in the mss calculation of lro packets in ix(4). This diff add vlan-header detection in ether_extract_headers() and uses this information to calculate the right mss. This fixes forwarding of vlan tagged lro packets. ok? bye, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.200 diff -u -p -r1.200 if_ix.c --- dev/pci/if_ix.c 18 Jul 2023 16:01:20 - 1.200 +++ dev/pci/if_ix.c 26 Jul 2023 09:21:15 - @@ -3275,6 +3275,10 @@ ixgbe_rxeof(struct rx_ring *rxr) /* Calculate header size. */ ether_extract_headers(sendmp, &ext); hdrlen = sizeof(*ext.eh); +#if NVLAN > 0 + if (ext.evh) + hdrlen += ETHER_VLAN_ENCAP_LEN; +#endif if (ext.ip4) hdrlen += ext.ip4->ip_hl << 2; if (ext.ip6) Index: net/if_ethersubr.c === RCS file: /cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.290 diff -u -p -r1.290 if_ethersubr.c --- net/if_ethersubr.c 6 Jul 2023 19:46:53 - 1.290 +++ net/if_ethersubr.c 26 Jul 2023 09:20:57 - @@ -1040,6 +1040,7 @@ ether_extract_headers(struct mbuf *mp, s uint64_t hlen; int hoff; uint8_t ipproto; + uint16_t ether_type; /* Return NULL if header was not recognized. */ memset(ext, 0, sizeof(*ext)); @@ -1048,9 +1049,20 @@ ether_extract_headers(struct mbuf *mp, s return; ext->eh = mtod(mp, struct ether_header *); - switch (ntohs(ext->eh->ether_type)) { + ether_type = ntohs(ext->eh->ether_type); + hlen = sizeof(*ext->eh); + +#if NVLAN > 0 + if (ether_type == ETHERTYPE_VLAN) { + ext->evh = mtod(mp, struct ether_vlan_header *); + ether_type = ntohs(ext->evh->evl_proto); + hlen = sizeof(*ext->evh); + } +#endif + + switch (ether_type) { case ETHERTYPE_IP: - m = m_getptr(mp, sizeof(*ext->eh), &hoff); + m = m_getptr(mp, hlen, &hoff); if (m == NULL || m->m_len - hoff < sizeof(*ext->ip4)) return; ext->ip4 = (struct ip *)(mtod(m, caddr_t) + hoff); @@ -1064,7 +1076,7 @@ ether_extract_headers(struct mbuf *mp, s break; #ifdef INET6 case ETHERTYPE_IPV6: - m = m_getptr(mp, sizeof(*ext->eh), &hoff); + m = m_getptr(mp, hlen, &hoff); if (m == NULL || m->m_len - hoff < sizeof(*ext->ip6)) return; ext->ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); Index: netinet/if_ether.h === RCS file: /cvs/src/sys/netinet/if_ether.h,v retrieving revision 1.89 diff -u -p -r1.89 if_ether.h --- netinet/if_ether.h 6 Jul 2023 19:46:53 - 1.89 +++ netinet/if_ether.h 26 Jul 2023 09:20:22 - @@ -301,11 +301,12 @@ uint64_t ether_addr_to_e64(const struct void ether_e64_to_addr(struct ether_addr *, uint64_t); struct ether_extracted { - struct ether_header *eh; - struct ip *ip4; - struct ip6_hdr *ip6; - struct tcphdr *tcp; - struct udphdr *udp; + struct ether_header *eh; + struct ether_vlan_header*evh; + struct ip *ip4; + struct ip6_hdr *ip6; + struct tcphdr *tcp; + struct udphdr *udp; }; void ether_extract_headers(struct mbuf *, struct ether_extracted *);
ixl(4): protect admin queue with mutex
Hi, there is an issue with the admin queue of ixl(4) which leads into the following panic when the link state changes: uvm_fault(0x818005f8, 0x18, 0, 2) -> e kernel: page fault trap, code=0 Stopped at ixl_intr0+0xca: movq%rdx,0x18(%rax) TIDPIDUID PRFLAGS PFLAGS CPU COMMAND 392823 13219 00x100040 02 ifstated 444681 94950 90 0x1100010 06 ospf6d 428704 9496 90 0x1100010 09 ospf6d 106020 59273 85 0x1100010 01 ospfd 420435 72114 85 0x1100010 05 ospfd 295821 93368 73 0x1100010 03 syslogd 367116 56598 0 0x14000 0x2007 zerothread 275385 57815 0 0x14000 0x2004 softnet ixl_intr0(84509000) at ixl_intr0+0xca intr_handler(0,844b0b80) at intr_handler+0x5b Xintr_ioapic_edge25_untramp() at Xintr_ioapic_edge25_untramp+0x18f acpicpu_idle() at acpicpu_idle+0x1f6 sched_idle(0) at sched_idle+0x280 end trace frame: 0x0, count: 10 https://www.openbsd.org/ddb.html describes the minimum info required in bug reports. Insufficient info makes it difficult to find and fix bugs. ddb{0}> The queue is corrupted in a way, that slot->iaq_cookie is 0. Which causes the uvm fault when iatq is dereferenced. The following diff uses a mutex to protect the admin queue and avoids the issue above. ok? bye, Jan Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.87 diff -u -p -r1.87 if_ixl.c --- dev/pci/if_ixl.c6 Feb 2023 20:27:45 - 1.87 +++ dev/pci/if_ixl.c19 Jul 2023 07:05:40 - @@ -1274,6 +1274,7 @@ struct ixl_softc { unsigned int sc_atq_prod; unsigned int sc_atq_cons; + struct mutex sc_atq_mtx; struct ixl_dmamemsc_arq; struct task sc_arq_task; struct ixl_aq_bufs sc_arq_idle; @@ -1723,6 +1724,8 @@ ixl_attach(struct device *parent, struct /* initialise the adminq */ + mtx_init(&sc->sc_atq_mtx, IPL_NET); + if (ixl_dmamem_alloc(sc, &sc->sc_atq, sizeof(struct ixl_aq_desc) * IXL_AQ_NUM, IXL_AQ_ALIGN) != 0) { printf("\n" "%s: unable to allocate atq\n", DEVNAME(sc)); @@ -3599,6 +3602,8 @@ ixl_atq_post(struct ixl_softc *sc, struc struct ixl_aq_desc *atq, *slot; unsigned int prod; + mtx_enter(&sc->sc_atq_mtx); + /* assert locked */ atq = IXL_DMA_KVA(&sc->sc_atq); @@ -3618,6 +3623,8 @@ ixl_atq_post(struct ixl_softc *sc, struc prod &= IXL_AQ_MASK; sc->sc_atq_prod = prod; ixl_wr(sc, sc->sc_aq_regs->atq_tail, prod); + + mtx_leave(&sc->sc_atq_mtx); } static void @@ -3628,11 +3635,15 @@ ixl_atq_done(struct ixl_softc *sc) unsigned int cons; unsigned int prod; + mtx_enter(&sc->sc_atq_mtx); + prod = sc->sc_atq_prod; cons = sc->sc_atq_cons; - if (prod == cons) + if (prod == cons) { + mtx_leave(&sc->sc_atq_mtx); return; + } atq = IXL_DMA_KVA(&sc->sc_atq); @@ -3645,6 +3656,7 @@ ixl_atq_done(struct ixl_softc *sc) if (!ISSET(slot->iaq_flags, htole16(IXL_AQ_DD))) break; + KASSERT(slot->iaq_cookie != 0); iatq = (struct ixl_atq *)slot->iaq_cookie; iatq->iatq_desc = *slot; @@ -3661,6 +3673,8 @@ ixl_atq_done(struct ixl_softc *sc) BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); sc->sc_atq_cons = cons; + + mtx_leave(&sc->sc_atq_mtx); } static void @@ -3691,6 +3705,8 @@ ixl_atq_poll(struct ixl_softc *sc, struc unsigned int prod; unsigned int t = 0; + mtx_enter(&sc->sc_atq_mtx); + atq = IXL_DMA_KVA(&sc->sc_atq); prod = sc->sc_atq_prod; slot = atq + prod; @@ -3712,8 +3728,10 @@ ixl_atq_poll(struct ixl_softc *sc, struc while (ixl_rd(sc, sc->sc_aq_regs->atq_head) != prod) { delaymsec(1); - if (t++ > tm) + if (t++ > tm) { + mtx_leave(&sc->sc_atq_mtx); return (ETIMEDOUT); + } } bus_dmamap_sync(sc->sc_dmat, IXL_DMA_MAP(&sc->sc_atq), @@ -3724,6 +3742,7 @@ ixl_atq_poll(struct ixl_softc *sc, struc sc->sc_atq_cons = prod; + mtx_leave(&sc->sc_atq_mtx); return (0); }
Re: tcp lro by default, call for testing
On Sat, Jul 08, 2023 at 05:15:26PM +0300, Alexander Bluhm wrote: > I am not aware of any more limitations when enabling LRO for TCP > in the network drivers. The feature allows to receive agregated > packets larger than the MTU. Receiving TCP streams becomes much > faster. > > As the network hardware is not aware whether a packet is received > locally or forwarded, everything is aggregated. In case of forwarding > it is split on output to packets not larger than the original > packets. So path MTU discovery should still work. If the outgoing > interface supports TSO, the packet is chopped in hardware. > > Currently only ix(4) and lo(4) support LRO, and ix(4) is limited > to IPv4 and newer than the old 82598 model. If the interface is > added to a bridge(4) or aggr(4), LRO is automatically disabled. I guess you mean veb(4) not aggr(4). We just avoid the in heritage of the LRO capability in aggr(4) but are using the feature. > So in case you possess any ix(4) hardware or do funky pf routing > on lo(4) please run this diff. If you encounter problems, report > and turn LRO off per interface with ifconfig -tcplro. Diff looks fine to me. I just would keep mentioning the default behavior in the manpage like this: ok jan@ Index: sbin/ifconfig/ifconfig.8 === RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v retrieving revision 1.397 diff -u -p -r1.397 ifconfig.8 --- sbin/ifconfig/ifconfig.87 Jun 2023 18:42:40 - 1.397 +++ sbin/ifconfig/ifconfig.810 Jul 2023 11:54:47 - @@ -517,9 +517,9 @@ It is not possible to use LRO with inter or .Xr tpmr 4 . Changing this option will re-initialize the network interface. +LRO is enabled by default. .It Cm -tcplro Disable LRO. -LRO is disabled by default. .It Cm up Mark an interface .Dq up . > Index: sys/dev/pci/if_ix.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v > retrieving revision 1.198 > diff -u -p -r1.198 if_ix.c > --- sys/dev/pci/if_ix.c 8 Jul 2023 09:01:30 - 1.198 > +++ sys/dev/pci/if_ix.c 8 Jul 2023 13:51:26 - > @@ -1925,8 +1925,10 @@ ixgbe_setup_interface(struct ix_softc *s > ifp->if_capabilities |= IFCAP_CSUM_IPv4; > > ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; > - if (sc->hw.mac.type != ixgbe_mac_82598EB) > + if (sc->hw.mac.type != ixgbe_mac_82598EB) { > + ifp->if_xflags |= IFXF_LRO; > ifp->if_capabilities |= IFCAP_LRO; > + } > > /* >* Specify the media types supported by this sc and register > Index: sys/net/if_loop.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v > retrieving revision 1.95 > diff -u -p -r1.95 if_loop.c > --- sys/net/if_loop.c 2 Jul 2023 19:59:15 - 1.95 > +++ sys/net/if_loop.c 8 Jul 2023 13:51:26 - > @@ -172,11 +172,11 @@ loop_clone_create(struct if_clone *ifc, > ifp->if_softc = NULL; > ifp->if_mtu = LOMTU; > ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST; > - ifp->if_xflags = IFXF_CLONED; > + ifp->if_xflags = IFXF_CLONED | IFXF_LRO; > ifp->if_capabilities = IFCAP_CSUM_IPv4 | > IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | > IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 | > - IFCAP_LRO; > + IFCAP_LRO | IFCAP_TSOv4 | IFCAP_TSOv6; > ifp->if_rtrequest = lortrequest; > ifp->if_ioctl = loioctl; > ifp->if_input = loinput; > Index: sbin/ifconfig/ifconfig.8 > === > RCS file: /data/mirror/openbsd/cvs/src/sbin/ifconfig/ifconfig.8,v > retrieving revision 1.397 > diff -u -p -r1.397 ifconfig.8 > --- sbin/ifconfig/ifconfig.8 7 Jun 2023 18:42:40 - 1.397 > +++ sbin/ifconfig/ifconfig.8 7 Jul 2023 19:57:09 - > @@ -519,7 +519,6 @@ or > Changing this option will re-initialize the network interface. > .It Cm -tcplro > Disable LRO. > -LRO is disabled by default. > .It Cm up > Mark an interface > .Dq up . >
Re: tcp lro tso path mtu
On Thu, Jul 06, 2023 at 10:19:21PM +0300, Alexander Bluhm wrote: > On Thu, Jul 06, 2023 at 08:49:03PM +0200, Jan Klemkow wrote: > > > @@ -109,6 +109,9 @@ > > > #include > > > #include > > > #include > > > > I think is a merge bug, isn't it? > > > > > +#include > > > +#include > > > +#include > > Right. > > > > + error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu); > > > + if (error || *mp == NULL) > > > + return error; > > > + > > > + if ((*mp)->m_pkthdr.len <= mtu) { > > > > I may miss something but... > > > > Couldn't you move the *_cksum_out() calls above the upper > > tcp_if_output_tso() call? And than remove the *_cksum_out() calls > > inside of tcp_if_output_tso()? > > > > Thus, there is just one place where we call them. > > > > > + switch (dst->sa_family) { > > > + case AF_INET: > > > + in_hdr_cksum_out(*mp, ifp); > > > + in_proto_cksum_out(*mp, ifp); > > > + break; > > > +#ifdef INET6 > > > + case AF_INET6: > > > + in6_proto_cksum_out(*mp, ifp); > > > + break; > > > +#endif > > There is the case in tcp_if_output_tso() where we call tcp_chopper(). > Then checksum has to be calcualted after chopping. If I do it > always before tcp_if_output_tso(), we may caluclate it twice. Once > for the large packet and once for the small ones. > > New diff without duplicate includes. tested with v4/v6, direct and forwarding. ok jan@ > Index: net/if.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v > retrieving revision 1.704 > diff -u -p -r1.704 if.c > --- net/if.c 6 Jul 2023 04:55:04 - 1.704 > +++ net/if.c 6 Jul 2023 19:15:00 - > @@ -886,6 +886,57 @@ if_output_ml(struct ifnet *ifp, struct m > } > > int > +if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, > +struct rtentry *rt, u_int mtu) > +{ > + uint32_t ifcap; > + int error; > + > + switch (dst->sa_family) { > + case AF_INET: > + ifcap = IFCAP_TSOv4; > + break; > +#ifdef INET6 > + case AF_INET6: > + ifcap = IFCAP_TSOv6; > + break; > +#endif > + default: > + unhandled_af(dst->sa_family); > + } > + > + /* > + * Try to send with TSO first. When forwarding LRO may set > + * maximium segment size in mbuf header. Chop TCP segment > + * even if it would fit interface MTU to preserve maximum > + * path MTU. > + */ > + error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu); > + if (error || *mp == NULL) > + return error; > + > + if ((*mp)->m_pkthdr.len <= mtu) { > + switch (dst->sa_family) { > + case AF_INET: > + in_hdr_cksum_out(*mp, ifp); > + in_proto_cksum_out(*mp, ifp); > + break; > +#ifdef INET6 > + case AF_INET6: > + in6_proto_cksum_out(*mp, ifp); > + break; > +#endif > + } > + error = ifp->if_output(ifp, *mp, dst, rt); > + *mp = NULL; > + return error; > + } > + > + /* mp still contains mbuf that has to be fragmented or dropped. */ > + return 0; > +} > + > +int > if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total, > struct sockaddr *dst, struct rtentry *rt) > { > Index: net/if_var.h > === > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v > retrieving revision 1.128 > diff -u -p -r1.128 if_var.h > --- net/if_var.h 28 Jun 2023 11:49:49 - 1.128 > +++ net/if_var.h 6 Jul 2023 19:12:39 - > @@ -329,6 +329,8 @@ int if_output_ml(struct ifnet *, struct > struct sockaddr *, struct rtentry *); > int if_output_mq(struct ifnet *, struct mbuf_queue *, unsigned int *, > struct sockaddr *, struct rtentry *); > +int if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *, > + struct rtentry *, u_int); > int if_output_local(struct ifnet *, struct mbuf *, sa_family_t); > void if_rtrequest_dummy(struct ifnet *, int, struct rtentry *); > void p2p_rtrequest(struct ifnet *, int, struct rtentry *); > Index: net/pf.c >
Re: tcp lro tso path mtu
On Mon, Jul 03, 2023 at 08:04:11PM +0300, Alexander Bluhm wrote: > As final step before making LRO (Large Receive Offload) the default, > we have to fix path MTU discovery when forwarding. > > The drivers, currently ix(4) and lo(4) only, record an upper bound > of the size of the original packets in ph_mss. When sending we > must chop the packets with TSO (TCP Segmentation Offload) to that > size. That means we have to call tcp_if_output_tso() before > ifp->if_output(). I have put that logic into if_output_tso() to > avoid code duplication. > > ok? I like the idea of this commit. Some comments below. Thanks, Jan > Index: net/if.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v > retrieving revision 1.702 > diff -u -p -r1.702 if.c > --- net/if.c 2 Jul 2023 19:59:15 - 1.702 > +++ net/if.c 3 Jul 2023 10:28:30 - > @@ -109,6 +109,9 @@ > #include > #include > #include I think is a merge bug, isn't it? > +#include > +#include > +#include > @@ -883,6 +886,57 @@ if_output_ml(struct ifnet *ifp, struct m > ml_purge(ml); > > return error; > +} > + > +int > +if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, > +struct rtentry *rt, u_int mtu) > +{ > + uint32_t ifcap; > + int error; > + > + switch (dst->sa_family) { > + case AF_INET: > + ifcap = IFCAP_TSOv4; > + break; > +#ifdef INET6 > + case AF_INET6: > + ifcap = IFCAP_TSOv6; > + break; > +#endif > + default: > + unhandled_af(dst->sa_family); > + } > + > + /* > + * Try to send with TSO first. When forwarding LRO may set > + * maximium segment size in mbuf header. Chop TCP segment > + * even if it would fit interface MTU to preserve maximum > + * path MTU. > + */ > + error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu); > + if (error || *mp == NULL) > + return error; > + > + if ((*mp)->m_pkthdr.len <= mtu) { I may miss something but... Couldn't you move the *_cksum_out() calls above the upper tcp_if_output_tso() call? And than remove the *_cksum_out() calls inside of tcp_if_output_tso()? Thus, there is just one place where we call them. > + switch (dst->sa_family) { > + case AF_INET: > + in_hdr_cksum_out(*mp, ifp); > + in_proto_cksum_out(*mp, ifp); > + break; > +#ifdef INET6 > + case AF_INET6: > + in6_proto_cksum_out(*mp, ifp); > + break; > +#endif > + } > + error = ifp->if_output(ifp, *mp, dst, rt); > + *mp = NULL; > + return error; > + } > + > + /* mp still contains mbuf that has to be fragmented or dropped. */ > + return 0; > }
Add ethernet type check in ifsetlro()
Hi, bluhm pointed out that the ether_brport_isset() check it just allowed on ethernet devices. Thus, I put an additional ethernet check in the condition. This also fixes EBUSY errors of "ifconfig lo0 tcplro" calls in my setup. ok? bye, Jan Index: net/if.c === RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.702 diff -u -p -r1.702 if.c --- net/if.c2 Jul 2023 19:59:15 - 1.702 +++ net/if.c3 Jul 2023 20:58:32 - @@ -3206,7 +3206,7 @@ ifsetlro(struct ifnet *ifp, int on) KERNEL_ASSERT_LOCKED(); /* for if_flags */ if (on && !ISSET(ifp->if_xflags, IFXF_LRO)) { - if (ether_brport_isset(ifp)) { + if (ifp->if_type == IFT_ETHER && ether_brport_isset(ifp)) { error = EBUSY; goto out; }
Re: lo(4) loopback LRO and TSO
On July 2, 2023 2:33:41 PM GMT+02:00, Claudio Jeker wrote: >On Sun, Jul 02, 2023 at 02:28:17PM +0200, Alexander Bluhm wrote: >> anyone? > >Was not able to test yet but I like the diff. >Right now this is a noop since LRO is not on by default for lo(4). >Because of that OK claudio@ The diff works fine in my sparc64 setup. ok jan@ >> On Fri, Jun 23, 2023 at 06:06:16PM +0200, Alexander Bluhm wrote: >> > Hi, >> > >> > Claudio@ mentioned the idea to use TSO and LRO on the loopback >> > interface to transfer TCP faster. >> > >> > I see a performance effect with this diff, but more importantly it >> > gives us more test coverage. Currently LRO on lo(4) is default >> > off. >> > >> > Future plan is: >> > - Fix some corner cases for LRO/TSO with TCP path-MTU discovery >> > and IP forwarding when LRO is enabled. >> > - Enable LRO/TSO for lo(4) and ix(4) per default. >> > - Jan@ commits his ixl(4) TSO diff. >> > >> > ok for lo(4) LRO/TSO with default off? >> > >> > bluhm >> > >> > Index: sys/net/if.c >> > === >> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v >> > retrieving revision 1.700 >> > diff -u -p -r1.700 if.c >> > --- sys/net/if.c 12 Jun 2023 21:19:54 - 1.700 >> > +++ sys/net/if.c 23 Jun 2023 15:48:27 - >> > @@ -106,6 +106,9 @@ >> > #ifdef MROUTING >> > #include >> > #endif >> > +#include >> > +#include >> > +#include >> > >> > #ifdef INET6 >> > #include >> > @@ -802,12 +805,29 @@ if_input_local(struct ifnet *ifp, struct >> > * is now incorrect, will be calculated before sending. >> > */ >> >keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT | >> > - M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT); >> > + M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT | >> > + M_TCP_TSO); >> >m_resethdr(m); >> >m->m_flags |= M_LOOP | keepflags; >> >m->m_pkthdr.csum_flags = keepcksum; >> >m->m_pkthdr.ph_ifidx = ifp->if_index; >> >m->m_pkthdr.ph_rtableid = ifp->if_rdomain; >> > + >> > + if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) { >> > + if (ifp->if_mtu > 0 && >> > + ((af == AF_INET && >> > + ISSET(ifp->if_capabilities, IFCAP_TSOv4)) || >> > + (af == AF_INET6 && >> > + ISSET(ifp->if_capabilities, IFCAP_TSOv6 { >> > + tcpstat_inc(tcps_inswlro); >> > + tcpstat_add(tcps_inpktlro, >> > + (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu); >> > + } else { >> > + tcpstat_inc(tcps_inbadlro); >> > + m_freem(m); >> > + return (EPROTONOSUPPORT); >> > + } >> > + } >> > >> >if (ISSET(keepcksum, M_TCP_CSUM_OUT)) >> >m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; >> > Index: sys/net/if_loop.c >> > === >> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v >> > retrieving revision 1.94 >> > diff -u -p -r1.94 if_loop.c >> > --- sys/net/if_loop.c 5 Jun 2023 11:35:46 - 1.94 >> > +++ sys/net/if_loop.c 23 Jun 2023 15:48:27 - >> > @@ -175,7 +175,8 @@ loop_clone_create(struct if_clone *ifc, >> >ifp->if_xflags = IFXF_CLONED; >> >ifp->if_capabilities = IFCAP_CSUM_IPv4 | >> >IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | >> > - IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; >> > + IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 | >> > + IFCAP_LRO; >> >ifp->if_rtrequest = lortrequest; >> >ifp->if_ioctl = loioctl; >> >ifp->if_input = loinput; >> > @@ -281,6 +282,10 @@ loioctl(struct ifnet *ifp, u_long cmd, c >> > >> >switch (cmd) { >> >case SIOCSIFFLAGS: >> > + if (ISSET(ifp->if_xflags, IFXF_LRO)) >> > + SET(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); >> > + else >> > + CLR(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6); >> >break; >> > >> >case SIOCSIFADDR: >> > Index: sys/netinet/tcp_usrreq.c >> > === >> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v >> > retrieving revision 1.219 >> > diff -u -p -r1.219 tcp_usrreq.c >> > --- sys/netinet/tcp_usrreq.c 23 May 2023 09:16:16 - 1.219 >> > +++ sys/netinet/tcp_usrreq.c 23 Jun 2023 15:48:27 - >> > @@ -1340,6 +1340,7 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o >> >ASSIGN(tcps_outhwtso); >> >ASSIGN(tcps_outpkttso); >> >ASSIGN(tcps_outbadtso); >> > + ASSIGN(tcps_inswlro); >> >ASSIGN(tcps_inhwlro); >> >ASSIGN(tcps_inpktlro); >> >ASSIGN(tcps_inbadlro); >> > Index: sys/netinet/tcp_var.h >> > === >> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v >> > retrieving revision 1.167 >> > diff -u -p -r1.167 tcp_var.h >> >
Re: tso ip6 forward
On Fri, Jun 16, 2023 at 12:06:08PM +0200, Alexander Bluhm wrote: > On Mon, Jun 12, 2023 at 03:46:28PM +0200, Alexander Bluhm wrote: > > I found a little inconsistency in IPv6 forwarding with TSO. > > > > Sending with TSO should only done if the large packet does not fit > > in the interface MTU. In case tcp_if_output_tso() does not process > > the packet, we should send an ICMP6 error. Rearrange the code that > > it looks more like other calls to tcp_if_output_tso(). > > > > All these cases can only be reached when LRO is turned on for IPv6 > > which none of our drivers currently supports. > > jan@ pointed out that reordering TSO in ip6 forward breaks path MTU > discovery. So lets only fix the forward counters, icmp6 packet too > big and icmp6 redirect. > > First try to send with TSO. The goto senderr handles icmp6 redirect > and other errors. > > If TSO is not necessary and the interface MTU fits, just send the > packet. Again goto senderr handles icmp6. > > Finally care about icmp6 packet too big. Works fine in my setup. > ok? ok jan@ > Index: netinet6/ip6_forward.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v > retrieving revision 1.110 > diff -u -p -r1.110 ip6_forward.c > --- netinet6/ip6_forward.c1 Jun 2023 09:05:33 - 1.110 > +++ netinet6/ip6_forward.c16 Jun 2023 08:55:43 - > @@ -321,35 +321,30 @@ reroute: > > error = tcp_if_output_tso(ifp, &m, sin6tosa(sin6), rt, IFCAP_TSOv6, > ifp->if_mtu); > + if (error) > + ip6stat_inc(ip6s_cantforward); > + else if (m == NULL) > + ip6stat_inc(ip6s_forward); > if (error || m == NULL) > - goto freecopy; > + goto senderr; > > /* Check the size after pf_test to give pf a chance to refragment. */ > - if (m->m_pkthdr.len > ifp->if_mtu) { > - if (mcopy) > - icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, > - ifp->if_mtu); > - m_freem(m); > - goto out; > + if (m->m_pkthdr.len <= ifp->if_mtu) { > + in6_proto_cksum_out(m, ifp); > + error = ifp->if_output(ifp, m, sin6tosa(sin6), rt); > + if (error) > + ip6stat_inc(ip6s_cantforward); > + else > + ip6stat_inc(ip6s_forward); > + goto senderr; > } > > - in6_proto_cksum_out(m, ifp); > - error = ifp->if_output(ifp, m, sin6tosa(sin6), rt); > - if (error) { > - ip6stat_inc(ip6s_cantforward); > - } else { > - ip6stat_inc(ip6s_forward); > - if (type) > - ip6stat_inc(ip6s_redirectsent); > - else { > - if (mcopy) > - goto freecopy; > - } > - } > + if (mcopy != NULL) > + icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); > + m_freem(m); > + goto out; > > -#if NPF > 0 || defined(IPSEC) > senderr: > -#endif > if (mcopy == NULL) > goto out; > > @@ -357,6 +352,7 @@ senderr: > case 0: > if (type == ND_REDIRECT) { > icmp6_redirect_output(mcopy, rt); > + ip6stat_inc(ip6s_redirectsent); > goto out; > } > goto freecopy; >
Re: ix(4): allocate less memory for tx buffers
On Fri, Jun 09, 2023 at 06:59:57PM +0200, Jan Klemkow wrote: > On Fri, Jun 09, 2023 at 06:11:38PM +0200, Jan Klemkow wrote: > > TSO packets are limited to MAXMCLBYTES (64k). Thus, we don't need to > > allocate IXGBE_TSO_SIZE (256k) per packet for the transmit buffers. > > > > This saves 3/4 of the memory and allows me to pack over 8 ix(8) ports > > into one machine. Otherwise I run out of devbuf in malloc(9). > > fix typo in comment Use a more precise compare in the CTASSERT condition. ok? Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.197 diff -u -p -r1.197 if_ix.c --- dev/pci/if_ix.c 1 Jun 2023 09:05:33 - 1.197 +++ dev/pci/if_ix.c 9 Jun 2023 16:01:18 - @@ -37,6 +37,12 @@ #include #include +/* + * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES. + * This interface could not handle packets greater than IXGBE_TSO_SIZE. + */ +CTASSERT(MAXMCLBYTES <= IXGBE_TSO_SIZE); + /* * Driver version */ @@ -2263,7 +2269,7 @@ ixgbe_allocate_transmit_buffers(struct t /* Create the descriptor buffer dma maps */ for (i = 0; i < sc->num_tx_desc; i++) { txbuf = &txr->tx_buffers[i]; - error = bus_dmamap_create(txr->txdma.dma_tag, IXGBE_TSO_SIZE, + error = bus_dmamap_create(txr->txdma.dma_tag, MAXMCLBYTES, sc->num_segs, PAGE_SIZE, 0, BUS_DMA_NOWAIT, &txbuf->map);
Re: ix(4): allocate less memory for tx buffers
On Fri, Jun 09, 2023 at 06:11:38PM +0200, Jan Klemkow wrote: > TSO packets are limited to MAXMCLBYTES (64k). Thus, we don't need to > allocate IXGBE_TSO_SIZE (256k) per packet for the transmit buffers. > > This saves 3/4 of the memory and allows me to pack over 8 ix(8) ports > into one machine. Otherwise I run out of devbuf in malloc(9). > > ok? fix typo in comment Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.197 diff -u -p -r1.197 if_ix.c --- dev/pci/if_ix.c 1 Jun 2023 09:05:33 - 1.197 +++ dev/pci/if_ix.c 9 Jun 2023 16:01:18 - @@ -37,6 +37,12 @@ #include #include +/* + * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES. + * This interface could not handle packets greater than IXGBE_TSO_SIZE. + */ +CTASSERT(MAXMCLBYTES < IXGBE_TSO_SIZE); + /* * Driver version */ @@ -2263,7 +2269,7 @@ ixgbe_allocate_transmit_buffers(struct t /* Create the descriptor buffer dma maps */ for (i = 0; i < sc->num_tx_desc; i++) { txbuf = &txr->tx_buffers[i]; - error = bus_dmamap_create(txr->txdma.dma_tag, IXGBE_TSO_SIZE, + error = bus_dmamap_create(txr->txdma.dma_tag, MAXMCLBYTES, sc->num_segs, PAGE_SIZE, 0, BUS_DMA_NOWAIT, &txbuf->map);
ix(4): allocate less memory for tx buffers
Hi, TSO packets are limited to MAXMCLBYTES (64k). Thus, we don't need to allocate IXGBE_TSO_SIZE (256k) per packet for the transmit buffers. This saves 3/4 of the memory and allows me to pack over 8 ix(8) ports into one machine. Otherwise I run out of devbuf in malloc(9). ok? bye, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.197 diff -u -p -r1.197 if_ix.c --- dev/pci/if_ix.c 1 Jun 2023 09:05:33 - 1.197 +++ dev/pci/if_ix.c 9 Jun 2023 16:01:18 - @@ -37,6 +37,12 @@ #include #include +/* + * Our TCP/IP Stack could not handle packets greater then MAXMCLBYTES. + * This interface could not handle packets greater then IXGBE_TSO_SIZE. + */ +CTASSERT(MAXMCLBYTES < IXGBE_TSO_SIZE); + /* * Driver version */ @@ -2263,7 +2269,7 @@ ixgbe_allocate_transmit_buffers(struct t /* Create the descriptor buffer dma maps */ for (i = 0; i < sc->num_tx_desc; i++) { txbuf = &txr->tx_buffers[i]; - error = bus_dmamap_create(txr->txdma.dma_tag, IXGBE_TSO_SIZE, + error = bus_dmamap_create(txr->txdma.dma_tag, MAXMCLBYTES, sc->num_segs, PAGE_SIZE, 0, BUS_DMA_NOWAIT, &txbuf->map);
Re: ifconfig rename tcplro
On Wed, Jun 07, 2023 at 02:49:07PM +0300, Vitaliy Makkoveev wrote: > On Wed, Jun 07, 2023 at 01:29:09PM +0200, Alexander Bluhm wrote: > > On Wed, Jun 07, 2023 at 12:59:11PM +0300, Vitaliy Makkoveev wrote: > > > On Wed, Jun 07, 2023 at 10:19:32AM +1000, David Gwynne wrote: > > > > > > > > > > > > > On 7 Jun 2023, at 06:33, Vitaliy Makkoveev wrote: > > > > > > > > > >> On 6 Jun 2023, at 20:29, Alexander Bluhm > > > > >> wrote: > > > > >> > > > > >> On Tue, Jun 06, 2023 at 05:54:31PM +0300, Vitaliy Makkoveev wrote: > > > > >>> On Tue, Jun 06, 2023 at 02:31:52PM +0200, Alexander Bluhm wrote: > > > > Hi, > > > > > > > > I would suggest to rename ifconfig tcprecvoffload to tcplro. Maybe > > > > it's just because I had to type that long name too often. > > > > > > > > With that we have consistent naming: > > > > # ifconfig ix0 tcplro > > > > # sysctl net.inet.tcp.tso=1 > > > > > > > > Also the coresponding flag are named LRO. > > > > # ifconfig ix1 hwfeatures > > > > ix1: flags=2008843 mtu > > > > 1500 > > > > > > > > hwfeatures=71b7 > > > > hardmtu 9198 > > > > > > > > The feature is quite new, so I have no backward compatiblity > > > > concerns. > > > > > > > > ok? > > > > > > > > >>> > > > > >>> Could you name it "lro" like FreeBSD uses? > > > > >> > > > > >> When I started with this, LRO and TSO were unknown to me. So with > > > > >> TCP prefix it may be clearer to users where the feature belongs. > > > > >> > > > > >> Naming is hard. > > > > > > > > > > Yeah, naming is definitely hard. I propose to use lro because it is > > > > > already used for the same purpose by FreeBSD, so the same name helps > > > > > to avoid confusion. > > > > > > > > > >lro If the driver supports tcp(4) large receive offloading, > > > > >enable LRO on the interface. > > > > > > > > > > Also, we have used "tso" keyword for tcp segmentation offloading for > > > > > the same reason, until it became global net.inet.tcp.tso. > > > > > > > > Is it going to be used to enable lro for udp and other protocols as > > > > well? > > > > > > Why not? We have tso feature system wide, so why don't have receive > > > offloading feature global for all supported protocols? Especially since > > > I suspect this control will be moved from ifconfig to global > > > net.inet.tcp.lro like net.inet.tcp.tso. > > > > Maybe we can make lro the default, and then move it to net.inet.tcp.lro. > > But I like to see another driver to implement it first. > > > > > However, I'm not the fan of original "tcprecvoffload" and like shorter > > > naming. > > > > Can we use ifconfig tcplro for now? > > + it only affects TCP > > + user see that it is related to TCP > > + it is not a 3 letter abrevation claudio does not like > > + it is shorter than tcprecvoffload > > > > cons > > - FreeBSD calls it lro > > > > Feel free to use tcplro. Do so. OK jan@
Re: ifconfig rename tcplro
On Tue, Jun 06, 2023 at 09:37:22AM -0700, Chris Cappuccio wrote: > Jan Klemkow [j.klem...@wemelug.de] wrote: > > On Tue, Jun 06, 2023 at 05:54:31PM +0300, Vitaliy Makkoveev wrote: > > > On Tue, Jun 06, 2023 at 02:31:52PM +0200, Alexander Bluhm wrote: > > > > I would suggest to rename ifconfig tcprecvoffload to tcplro. Maybe > > > > it's just because I had to type that long name too often. > > > > > > > > With that we have consistent naming: > > > > # ifconfig ix0 tcplro > > > > # sysctl net.inet.tcp.tso=1 > > > > > > > > Also the coresponding flag are named LRO. > > > > # ifconfig ix1 hwfeatures > > > > ix1: flags=2008843 mtu 1500 > > > > > > > > hwfeatures=71b7 > > > > hardmtu 9198 > > > > > > > > The feature is quite new, so I have no backward compatiblity concerns. > > > > > > > > ok? > > > > > > Could you name it "lro" like FreeBSD uses? > > > > I also would prefer this one. > > and tcpsendoffload back to tso ? > > was the reason for changing it from tso due to the initial conflation > of TSO and LRO in the tree? Yes. At the start of this, I just want to keep it simple with one ifconfig option "tso". But, tso is now default in tcp_output() and can be controlled globally via sysctl(2) net.inet.tcp.tso. Thus, we just need to control LRO per interface.
Re: ifconfig rename tcplro
On Tue, Jun 06, 2023 at 05:54:31PM +0300, Vitaliy Makkoveev wrote: > On Tue, Jun 06, 2023 at 02:31:52PM +0200, Alexander Bluhm wrote: > > I would suggest to rename ifconfig tcprecvoffload to tcplro. Maybe > > it's just because I had to type that long name too often. > > > > With that we have consistent naming: > > # ifconfig ix0 tcplro > > # sysctl net.inet.tcp.tso=1 > > > > Also the coresponding flag are named LRO. > > # ifconfig ix1 hwfeatures > > ix1: flags=2008843 mtu 1500 > > > > hwfeatures=71b7 > > hardmtu 9198 > > > > The feature is quite new, so I have no backward compatiblity concerns. > > > > ok? > > Could you name it "lro" like FreeBSD uses? I also would prefer this one.
Re: ifconfig rename tcplro
On Tue, Jun 06, 2023 at 02:31:52PM +0200, Alexander Bluhm wrote: > I would suggest to rename ifconfig tcprecvoffload to tcplro. Maybe > it's just because I had to type that long name too often. > > With that we have consistent naming: > # ifconfig ix0 tcplro > # sysctl net.inet.tcp.tso=1 > > Also the coresponding flag are named LRO. > # ifconfig ix1 hwfeatures > ix1: flags=2008843 mtu 1500 > > hwfeatures=71b7 > hardmtu 9198 > > The feature is quite new, so I have no backward compatiblity concerns. > > ok? I like this shorter naming. Its OK from my side. > Index: sbin/ifconfig/ifconfig.8 > === > RCS file: /data/mirror/openbsd/cvs/src/sbin/ifconfig/ifconfig.8,v > retrieving revision 1.396 > diff -u -p -r1.396 ifconfig.8 > --- sbin/ifconfig/ifconfig.8 1 Jun 2023 18:57:53 - 1.396 > +++ sbin/ifconfig/ifconfig.8 6 Jun 2023 12:18:07 - > @@ -501,7 +501,7 @@ Query and display information and diagno > modules installed in an interface. > It is only supported by drivers implementing the necessary functionality > on hardware which supports it. > -.It Cm tcprecvoffload > +.It Cm tcplro > Enable TCP large receive offload (LRO) if it's supported by the hardware; see > .Cm hwfeatures . > LRO enabled network interfaces modify received TCP/IP packets. > @@ -517,7 +517,7 @@ It is not possible to use LRO with inter > or > .Xr tpmr 4 . > Changing this option will re-initialize the network interface. > -.It Cm -tcprecvoffload > +.It Cm -tcplro > Disable LRO. > LRO is disabled by default. > .It Cm up > Index: sbin/ifconfig/ifconfig.c > === > RCS file: /data/mirror/openbsd/cvs/src/sbin/ifconfig/ifconfig.c,v > retrieving revision 1.465 > diff -u -p -r1.465 ifconfig.c > --- sbin/ifconfig/ifconfig.c 1 Jun 2023 18:57:54 - 1.465 > +++ sbin/ifconfig/ifconfig.c 6 Jun 2023 12:18:59 - > @@ -471,8 +471,8 @@ const struct cmd { > { "-soii", IFXF_INET6_NOSOII, 0, setifxflags }, > { "monitor",IFXF_MONITOR, 0, setifxflags }, > { "-monitor", -IFXF_MONITOR, 0, setifxflags }, > - { "tcprecvoffload", IFXF_LRO, 0, setifxflags }, > - { "-tcprecvoffload", -IFXF_LRO, 0, setifxflags }, > + { "tcplro", IFXF_LRO, 0, setifxflags }, > + { "-tcplro",-IFXF_LRO, 0, setifxflags }, > #ifndef SMALL > { "hwfeatures", NEXTARG0, 0, printifhwfeatures }, > { "metric", NEXTARG,0, setifmetric }, >
Re: Virtio fix for testing
On Wed, May 24, 2023 at 08:50:26PM +0200, Stefan Fritsch wrote: > I forgot to mention that no stress test is necessary. If it boots and the > virtio devices work at all, that should be enough. Works for me on Linux/KVM with the following devices: vga1 at pci0 dev 2 function 0 "Qumranet Virtio 1.x GPU" rev 0x01 virtio0 at pci0 dev 4 function 0 "Qumranet Virtio Storage" rev 0x00 virtio1 at pci0 dev 6 function 0 "Qumranet Virtio Console" rev 0x00 virtio2 at pci0 dev 7 function 0 "Qumranet Virtio Memory Balloon" rev 0x00 virtio3 at pci0 dev 8 function 0 "Qumranet Virtio Network" rev 0x00 and on OpenBSD/VMM with: virtio0 at pci0 dev 1 function 0 "Qumranet Virtio RNG" rev 0x00 virtio1 at pci0 dev 2 function 0 "Qumranet Virtio Network" rev 0x00 virtio2 at pci0 dev 3 function 0 "Qumranet Virtio Storage" rev 0x00 virtio3 at pci0 dev 4 function 0 "Qumranet Virtio SCSI" rev 0x00 Thanks, Jan
Re: ix(4): LRO forwarding
On Wed, May 24, 2023 at 05:28:58PM +0200, Alexander Bluhm wrote: > On Tue, May 23, 2023 at 02:14:57PM +0200, Jan Klemkow wrote: > > Hi, > > > > This diff sets needed offloading flags and the calculated mss to LRO > > mbufs in ix(4). Thus, we can forward this packets and process them via > > tcp_if_output_tso(). This diff also uses tcp_if_output_tso() in > > ip6_forward(). > > > > I tested the ip6_forward path via the address family transition in pf: > > > > pass in inet from 192.168.1.1 to 192.168.13.2 af-to \ > > inet6 from fc00:13::1 to fc00:13::2 > > > > ok? > > crashes during my tests with lro turned on. Looks like devision > by zero. I added a check, that avoids the TSO flags if mss it zero. Thus, we avoid a division by zero in later TSO processing. ok? Thanks, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.196 diff -u -p -r1.196 if_ix.c --- dev/pci/if_ix.c 23 May 2023 09:16:16 - 1.196 +++ dev/pci/if_ix.c 25 May 2023 20:02:06 - @@ -3257,13 +3257,40 @@ ixgbe_rxeof(struct rx_ring *rxr) if (sendmp->m_pkthdr.ph_mss > 0) { struct ether_extracted ext; + uint64_t hlen; uint16_t pkts = sendmp->m_pkthdr.ph_mss; + /* Calculate header size. */ ether_extract_headers(sendmp, &ext); - if (ext.tcp) + hlen = sizeof(*ext.eh); + if (ext.ip4) { + hlen += ext.ip4->ip_hl << 2; + } else if (ext.ip6) { + if (ext.ip6->ip6_nxt == IPPROTO_TCP) + hlen += sizeof(*ext.ip6); + else + tcpstat_inc(tcps_inbadlro); + } + if (ext.tcp) { tcpstat_inc(tcps_inhwlro); - else + hlen += ext.tcp->th_off << 2; + } else { tcpstat_inc(tcps_inbadlro); + } + + /* +* If we gonna forward this packet, we have to +* mark it as TSO, recalculate the TCP checksum +* and set a correct mss. +*/ + sendmp->m_pkthdr.ph_mss = + (sendmp->m_pkthdr.len - hlen) / pkts; + + if (sendmp->m_pkthdr.ph_mss != 0) { + SET(sendmp->m_pkthdr.csum_flags, + M_TCP_CSUM_OUT | M_TCP_TSO); + } + tcpstat_add(tcps_inpktlro, pkts); } Index: netinet6/ip6_forward.c === RCS file: /cvs/src/sys/netinet6/ip6_forward.c,v retrieving revision 1.109 diff -u -p -r1.109 ip6_forward.c --- netinet6/ip6_forward.c 5 Apr 2023 13:56:31 - 1.109 +++ netinet6/ip6_forward.c 25 May 2023 20:03:06 - @@ -63,8 +63,10 @@ #include #include #include -#include #endif +#include +#include +#include /* * Forward a packet. If some error occurs return the sender @@ -316,7 +318,11 @@ reroute: goto reroute; } #endif - in6_proto_cksum_out(m, ifp); + + error = tcp_if_output_tso(ifp, &m, sin6tosa(sin6), rt, IFCAP_TSOv6, + ifp->if_mtu); + if (error || m == NULL) + goto freecopy; /* Check the size after pf_test to give pf a chance to refragment. */ if (m->m_pkthdr.len > ifp->if_mtu) { @@ -326,6 +332,8 @@ reroute: m_freem(m); goto out; } + + in6_proto_cksum_out(m, ifp); error = ifp->if_output(ifp, m, sin6tosa(sin6), rt); if (error) {
ix(4): LRO forwarding
Hi, This diff sets needed offloading flags and the calculated mss to LRO mbufs in ix(4). Thus, we can forward this packets and process them via tcp_if_output_tso(). This diff also uses tcp_if_output_tso() in ip6_forward(). I tested the ip6_forward path via the address family transition in pf: pass in inet from 192.168.1.1 to 192.168.13.2 af-to \ inet6 from fc00:13::1 to fc00:13::2 ok? bye, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.196 diff -u -p -r1.196 if_ix.c --- dev/pci/if_ix.c 23 May 2023 09:16:16 - 1.196 +++ dev/pci/if_ix.c 23 May 2023 11:02:52 - @@ -3257,13 +3257,38 @@ ixgbe_rxeof(struct rx_ring *rxr) if (sendmp->m_pkthdr.ph_mss > 0) { struct ether_extracted ext; + uint64_t hlen; uint16_t pkts = sendmp->m_pkthdr.ph_mss; + /* Calculate header size. */ ether_extract_headers(sendmp, &ext); - if (ext.tcp) + hlen = sizeof(*ext.eh); + if (ext.ip4) { + hlen += ext.ip4->ip_hl << 2; + } else if (ext.ip6) { + if (ext.ip6->ip6_nxt == IPPROTO_TCP) + hlen += sizeof(*ext.ip6); + else + tcpstat_inc(tcps_inbadlro); + } + if (ext.tcp) { tcpstat_inc(tcps_inhwlro); - else + hlen += ext.tcp->th_off << 2; + } else { tcpstat_inc(tcps_inbadlro); + } + + /* +* If we gonna forward this packet, we have to +* mark it as TSO, recalculate the TCP checksum +* and set a correct mss. +*/ + SET(sendmp->m_pkthdr.csum_flags, + M_TCP_CSUM_OUT | M_TCP_TSO); + + sendmp->m_pkthdr.ph_mss = + (sendmp->m_pkthdr.len - hlen) / pkts; + tcpstat_add(tcps_inpktlro, pkts); } Index: netinet6/ip6_forward.c === RCS file: /cvs/src/sys/netinet6/ip6_forward.c,v retrieving revision 1.109 diff -u -p -r1.109 ip6_forward.c --- netinet6/ip6_forward.c 5 Apr 2023 13:56:31 - 1.109 +++ netinet6/ip6_forward.c 23 May 2023 11:59:19 - @@ -63,8 +63,10 @@ #include #include #include -#include #endif +#include +#include +#include /* * Forward a packet. If some error occurs return the sender @@ -316,7 +318,11 @@ reroute: goto reroute; } #endif - in6_proto_cksum_out(m, ifp); + + error = tcp_if_output_tso(ifp, &m, sin6tosa(sin6), rt, IFCAP_TSOv6, + ifp->if_mtu); + if (error || m == NULL) + goto freecopy; /* Check the size after pf_test to give pf a chance to refragment. */ if (m->m_pkthdr.len > ifp->if_mtu) { @@ -326,6 +332,8 @@ reroute: m_freem(m); goto out; } + + in6_proto_cksum_out(m, ifp); error = ifp->if_output(ifp, m, sin6tosa(sin6), rt); if (error) {
Fix wrong interface mtu in tcp_mss
Hi, We use the wrong interface and mtu in tcp_mss() to calculate the mss if the destination address points is a local address. In ip_output() we use the correct interface and its mtu. This limits the mss to 1448 if the mtu of the interface it 1500, instead of using a local 32k mss. The bigger issue is: local bulk traffic with the current TSO implementation is broken. tcp_output() creates TSO packets with an mss smaller then 32k and ip_output() calls if_output instead of tcp_if_output_tso() because it fits into the mtu check of lo0. This diff takes the same logic to pick the interface in tcp_mss() as its done in ip_output() and fixes both issues. ok? bye, Jan Index: netinet/tcp_input.c === RCS file: /cvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.387 diff -u -p -r1.387 tcp_input.c --- netinet/tcp_input.c 14 Mar 2023 00:24:05 - 1.387 +++ netinet/tcp_input.c 19 May 2023 17:22:47 - @@ -2805,7 +2805,11 @@ tcp_mss(struct tcpcb *tp, int offer) if (rt == NULL) goto out; - ifp = if_get(rt->rt_ifidx); + if (ISSET(rt->rt_flags, RTF_LOCAL)) + ifp = if_get(rtable_loindex(inp->inp_rtableid)); + else + ifp = if_get(rt->rt_ifidx); + if (ifp == NULL) goto out;
Re: Add LRO counter in ix(4)
On Thu, May 18, 2023 at 12:01:44AM +0200, Alexander Bluhm wrote: > On Tue, May 16, 2023 at 09:11:48PM +0200, Jan Klemkow wrote: > > @@ -412,6 +412,10 @@ tcp_stats(char *name) > > p(tcps_outhwtso, "\t\t%u output TSO packet%s hardware processed\n"); > > p(tcps_outpkttso, "\t\t%u output TSO packet%s generated\n"); > > p(tcps_outbadtso, "\t\t%u output TSO packet%s dropped\n"); > > + p(tcps_inhwlro, "\t\t%u input LRO generated packet%s from hardware\n"); > > + p(tcps_inpktlro, "\t\t%u input LRO coalesced packet%s from hardware\n"); > > ... coalesced packet%s by hardware done > > + p(tcps_inbadlro, "\t\t%u input bad LRO packet%s from hardware\n"); > > + > > Move this down to the "packets received" section. You included it > in "packets sent". done > > + /* > > +* This function iterates over interleaved descriptors. > > +* Thus, we reuse ph_mss as global segment counter per > > +* TCP connection, insteat of introducing a new variable > > s/insteat/instead/ done ok? Thanks, Jan diff --git a/sys/dev/pci/if_ix.c b/sys/dev/pci/if_ix.c index 4119a2416dc..924a6d63236 100644 --- a/sys/dev/pci/if_ix.c +++ b/sys/dev/pci/if_ix.c @@ -3214,12 +3214,23 @@ ixgbe_rxeof(struct rx_ring *rxr) sendmp = rxbuf->fmp; rxbuf->buf = rxbuf->fmp = NULL; - if (sendmp != NULL) /* secondary frag */ + if (sendmp != NULL) { /* secondary frag */ sendmp->m_pkthdr.len += mp->m_len; - else { + + /* +* This function iterates over interleaved descriptors. +* Thus, we reuse ph_mss as global segment counter per +* TCP connection, instead of introducing a new variable +* in m_pkthdr. +*/ + if (rsccnt) + sendmp->m_pkthdr.ph_mss += rsccnt - 1; + } else { /* first desc of a non-ps chain */ sendmp = mp; sendmp->m_pkthdr.len = mp->m_len; + if (rsccnt) + sendmp->m_pkthdr.ph_mss = rsccnt - 1; #if NVLAN > 0 if (sc->vlan_stripping && staterr & IXGBE_RXD_STAT_VP) { sendmp->m_pkthdr.ether_vtag = vtag; @@ -3241,6 +3252,21 @@ ixgbe_rxeof(struct rx_ring *rxr) SET(sendmp->m_pkthdr.csum_flags, M_FLOWID); } + if (sendmp->m_pkthdr.ph_mss == 1) + sendmp->m_pkthdr.ph_mss = 0; + + if (sendmp->m_pkthdr.ph_mss > 0) { + struct ether_extracted ext; + uint16_t pkts = sendmp->m_pkthdr.ph_mss; + + ether_extract_headers(sendmp, &ext); + if (ext.tcp) + tcpstat_inc(tcps_inhwlro); + else + tcpstat_inc(tcps_inbadlro); + tcpstat_add(tcps_inpktlro, pkts); + } + ml_enqueue(&ml, sendmp); } next_desc: diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 120e3cc5ea7..3970636cde1 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1340,6 +1340,9 @@ tcp_sysctl_tcpstat(void *oldp, size_t *oldlenp, void *newp) ASSIGN(tcps_outhwtso); ASSIGN(tcps_outpkttso); ASSIGN(tcps_outbadtso); + ASSIGN(tcps_inhwlro); + ASSIGN(tcps_inpktlro); + ASSIGN(tcps_inbadlro); #undef ASSIGN diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 0a9630d719f..e706fedd0e7 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -447,6 +447,9 @@ struct tcpstat { u_int32_t tcps_outhwtso;/* output tso processed by hardware */ u_int32_t tcps_outpkttso; /* packets generated by tso */ u_int32_t tcps_outbadtso; /* output tso failed, packet dropped */ + u_int32_t tcps_inhwlro; /* input lro from hardware */ + u_int32_t tcps_inpktlro;/* packets coalessed by hardware lro */ + u_int32_t tcps_inbadlro;/* input bad lro packets from hardware */ }; /* @@ -625,6 +628,9 @@ enum tcpstat_counters { tcps_outhwtso, tcps_outpkttso, tcps_outbadtso, + tcps_inhwlro, + tcps_inpktlro, + tcps_inbadlro, tcps_ncounters, };
Add LRO counter in ix(4)
Hi, This diff introduces new counters for LRO packets, we get from the network interface. It shows, how many packets the network interface has coalesced into LRO packets. In followup diff, this packet counter will also be used to set the ph_mss variable to valid value. So, the stack is able to forward or redirect this kind of packets. ok? bye, Jan Index: usr.bin/netstat/inet.c === RCS file: /cvs/src/usr.bin/netstat/inet.c,v retrieving revision 1.175 diff -u -p -r1.175 inet.c --- usr.bin/netstat/inet.c 10 May 2023 12:07:17 - 1.175 +++ usr.bin/netstat/inet.c 16 May 2023 17:55:20 - @@ -412,6 +412,10 @@ tcp_stats(char *name) p(tcps_outhwtso, "\t\t%u output TSO packet%s hardware processed\n"); p(tcps_outpkttso, "\t\t%u output TSO packet%s generated\n"); p(tcps_outbadtso, "\t\t%u output TSO packet%s dropped\n"); + p(tcps_inhwlro, "\t\t%u input LRO generated packet%s from hardware\n"); + p(tcps_inpktlro, "\t\t%u input LRO coalesced packet%s from hardware\n"); + p(tcps_inbadlro, "\t\t%u input bad LRO packet%s from hardware\n"); + p(tcps_rcvtotal, "\t%u packet%s received\n"); p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t%u ack%s (for %llu byte%s)\n"); p(tcps_rcvdupack, "\t\t%u duplicate ack%s\n"); Index: sys/dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.194 diff -u -p -r1.194 if_ix.c --- sys/dev/pci/if_ix.c 16 May 2023 14:32:54 - 1.194 +++ sys/dev/pci/if_ix.c 16 May 2023 18:49:33 - @@ -3175,12 +3175,23 @@ ixgbe_rxeof(struct rx_ring *rxr) sendmp = rxbuf->fmp; rxbuf->buf = rxbuf->fmp = NULL; - if (sendmp != NULL) /* secondary frag */ + if (sendmp != NULL) { /* secondary frag */ sendmp->m_pkthdr.len += mp->m_len; - else { + + /* +* This function iterates over interleaved descriptors. +* Thus, we reuse ph_mss as global segment counter per +* TCP connection, insteat of introducing a new variable +* in m_pkthdr. +*/ + if (rsccnt) + sendmp->m_pkthdr.ph_mss += rsccnt - 1; + } else { /* first desc of a non-ps chain */ sendmp = mp; sendmp->m_pkthdr.len = mp->m_len; + if (rsccnt) + sendmp->m_pkthdr.ph_mss = rsccnt - 1; #if NVLAN > 0 if (sc->vlan_stripping && staterr & IXGBE_RXD_STAT_VP) { sendmp->m_pkthdr.ether_vtag = vtag; @@ -3200,6 +3211,21 @@ ixgbe_rxeof(struct rx_ring *rxr) if (hashtype != IXGBE_RXDADV_RSSTYPE_NONE) { sendmp->m_pkthdr.ph_flowid = hash; SET(sendmp->m_pkthdr.csum_flags, M_FLOWID); + } + + if (sendmp->m_pkthdr.ph_mss == 1) + sendmp->m_pkthdr.ph_mss = 0; + + if (sendmp->m_pkthdr.ph_mss > 0) { + struct ether_extracted ext; + uint16_t pkts = sendmp->m_pkthdr.ph_mss; + + ether_extract_headers(sendmp, &ext); + if (ext.tcp) + tcpstat_inc(tcps_inhwlro); + else + tcpstat_inc(tcps_inbadlro); + tcpstat_add(tcps_inpktlro, pkts); } ml_enqueue(&ml, sendmp); Index: sys/dev/pci/ixgbe.h === RCS file: /cvs/src/sys/dev/pci/ixgbe.h,v retrieving revision 1.33 diff -u -p -r1.33 ixgbe.h --- sys/dev/pci/ixgbe.h 8 Feb 2022 03:38:00 - 1.33 +++ sys/dev/pci/ixgbe.h 16 May 2023 17:55:20 - @@ -60,12 +60,18 @@ #include #include +#include #include +struct tdb; + #include #include #include #include +#include +#include +#include #if NBPFILTER > 0 #include Index: sys/netinet/tcp_usrreq.c === RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.218 diff -u -p -r1.218 tcp_usrreq.c --- sys/netinet/tcp_usrreq.c10 May 2023 12:07:16 - 1.218 +++ sys/netinet/tcp_usrreq.c16 May 2023 17:55:20 - @@ -1340,6 +1340,9 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o ASSIGN(tcps_outhwtso); ASSIGN(tcps_outpkttso); ASSIGN(tcps_outbadtso); + ASSIGN(tcps_inhwlro); + ASSIGN(tcps_inpktlro); + ASSIG
Re: seperate LRO/TSO flags
On Mon, May 15, 2023 at 11:40:20AM +0200, Alexander Bluhm wrote: > On Mon, May 15, 2023 at 09:34:21AM +0200, Jan Klemkow wrote: > > @@ -251,12 +251,16 @@ struct if_status_description { > > #defineIFCAP_VLAN_HWTAGGING0x0020 /* hardware VLAN tag > > support */ > > #defineIFCAP_CSUM_TCPv60x0080 /* can do IPv6/TCP > > checksums */ > > #defineIFCAP_CSUM_UDPv60x0100 /* can do IPv6/UDP > > checksums */ > > -#defineIFCAP_TSO 0x4000 /* TCP segment > > offloading */ > > +#defineIFCAP_LRO 0x1000 /* TCP large recv > > offload */ > > +#defineIFCAP_TSOv4 0x2000 /* TCP segmentation > > offload */ > > +#defineIFCAP_TSOv6 0x4000 /* TCP segmentation > > offload */ > > #defineIFCAP_WOL 0x8000 /* can do wake on lan */ > > I would prefer to keep the numbers of IFCAP_TSO/IFCAP_LRO as this > is just a naming error. Then we have less confusion during the > ifconfig transition phase. > > +#define IFCAP_TSOv4 0x1000 > +#define IFCAP_TSOv6 0x2000 > -#define IFCAP_TSO0x4000 > +#define IFCAP_LRO0x4000 > > > +#define IFCAP_TSO (IFCAP_TSOv4 | IFCAP_TSOv6) > > + > > Could you please remove this chunk and expand it, where is used? > This one more define does not make the code clearer. And this flag > IFCAP_TSO had a different meaning before renaming. When it is not > introduced again, the compiler makes sure that no renaming was > forgotten. done Also: - updated the diff to the current source state - improved the vlan(4) capability handling @dlg: Whats your opinion about this diff? ok? Thanks, Jan Index: sbin/ifconfig/ifconfig.8 === RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v retrieving revision 1.394 diff -u -p -r1.394 ifconfig.8 --- sbin/ifconfig/ifconfig.826 Apr 2023 02:38:08 - 1.394 +++ sbin/ifconfig/ifconfig.815 May 2023 18:46:48 - @@ -282,8 +282,18 @@ tag. As CSUM_TCPv4, but supports IPv6 datagrams. .It Sy CSUM_UDPv6 As above, for UDP. -.It Sy TSO -The device supports TCP segment offloading (TSO). +.It Sy LRO +The device supports TCP large receive offload (LRO). +.It Sy TSOv4 +The device supports IPv4 TCP segmentation offload (TSO). +TSO is used by default. +Use the +.Xr sysctl 8 +variable +.Va net.inet.tcp.tso +to disable this feature. +.It Sy TSOv6 +As above, for IPv6. .It Sy WOL The device supports Wake on LAN (WoL). .It Sy hardmtu @@ -491,25 +501,25 @@ Query and display information and diagno modules installed in an interface. It is only supported by drivers implementing the necessary functionality on hardware which supports it. -.It Cm tso -Enable TCP segmentation offloading (TSO) if it's supported by the hardware; see +.It Cm tcprecvoffload +Enable TCP large receive offload (LRO) if it's supported by the hardware; see .Cm hwfeatures . -TSO enabled NICs modify received TCP/IP packets. +LRO enabled network interfaces modify received TCP/IP packets. This will also affect traffic of upper layer interfaces, such as .Xr vlan 4 , .Xr aggr 4 , and .Xr carp 4 . -It is not possible to use TSO with interfaces attached to a +It is not possible to use LRO with interfaces attached to a .Xr bridge 4 , .Xr veb 4 , or .Xr tpmr 4 . Changing this option will re-initialize the network interface. -.It Cm -tso -Disable TSO. -TSO is disabled by default. +.It Cm -tcprecvoffload +Disable LRO. +LRO is disabled by default. .It Cm up Mark an interface .Dq up . Index: sbin/ifconfig/ifconfig.c === RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v retrieving revision 1.463 diff -u -p -r1.463 ifconfig.c --- sbin/ifconfig/ifconfig.c12 May 2023 18:24:13 - 1.463 +++ sbin/ifconfig/ifconfig.c15 May 2023 20:27:51 - @@ -126,7 +126,7 @@ #define HWFEATURESBITS \ "\024\1CSUM_IPv4\2CSUM_TCPv4\3CSUM_UDPv4" \ "\5VLAN_MTU\6VLAN_HWTAGGING\10CSUM_TCPv6" \ - "\11CSUM_UDPv6\17TSO\20WOL" + "\11CSUM_UDPv6\15TSOv4\16TSOv6\17LSO\20WOL" struct ifencap { unsigned int ife_flags; @@ -469,8 +469,8 @@ const structcmd { { "-soii", IFXF_INET6_NOSOII, 0, setifxflags }, { "monitor",IFXF_MONITOR, 0, setifxflags }, { "-monitor", -IFXF_MONITOR, 0, setifxflags }, - { "tso",IFXF_TSO, 0, setifxflags }, - { "-tso", -IFXF_TSO, 0, setifxf
Re: seperate LRO/TSO flags
On Sat, May 13, 2023 at 04:44:18PM +0200, Christian Weisgerber wrote: > Jan Klemkow: > > > This diff introduces separate flags for TCP offloading. We split this > > into LRO (large receive offloading) and TSO (TCP segmentation > > offloading). Thus, we are able to turn it on/off separately. > > Wait, why do we even have a knob for TSO? > > We specifically decided not to have a knob for checksum offloading, > because it should just work out of the box, and if it doesn't, then > it should be disabled by the driver. It should not be the admin's > task to figure out if the implementation is broken and to fiddle > with the knobs (hi, FreeBSD!). > > I would assume that line of thinking extends to TSO. You are right. This is reflected in the current state of the diff below. We just need a knob for TCP Large Receive Offload (LRO) because it changes the TCP segments. You may want to avoid this on a forwarding router. ok? Thanks, Jan Index: sbin/ifconfig/ifconfig.8 === RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v retrieving revision 1.394 diff -u -p -r1.394 ifconfig.8 --- sbin/ifconfig/ifconfig.826 Apr 2023 02:38:08 - 1.394 +++ sbin/ifconfig/ifconfig.812 May 2023 06:22:35 - @@ -282,8 +282,18 @@ tag. As CSUM_TCPv4, but supports IPv6 datagrams. .It Sy CSUM_UDPv6 As above, for UDP. -.It Sy TSO -The device supports TCP segment offloading (TSO). +.It Sy LRO +The device supports TCP large receive offload (LRO). +.It Sy TSOv4 +The device supports IPv4 TCP segmentation offload (TSO). +TSO is used by default. +Use the +.Xr sysctl 8 +variable +.Va net.inet.tcp.tso +to disable this feature. +.It Sy TSOv6 +As above, for IPv6. .It Sy WOL The device supports Wake on LAN (WoL). .It Sy hardmtu @@ -491,25 +501,25 @@ Query and display information and diagno modules installed in an interface. It is only supported by drivers implementing the necessary functionality on hardware which supports it. -.It Cm tso -Enable TCP segmentation offloading (TSO) if it's supported by the hardware; see +.It Cm tcprecvoffload +Enable TCP large receive offload (LRO) if it's supported by the hardware; see .Cm hwfeatures . -TSO enabled NICs modify received TCP/IP packets. +LRO enabled network interfaces modify received TCP/IP packets. This will also affect traffic of upper layer interfaces, such as .Xr vlan 4 , .Xr aggr 4 , and .Xr carp 4 . -It is not possible to use TSO with interfaces attached to a +It is not possible to use LRO with interfaces attached to a .Xr bridge 4 , .Xr veb 4 , or .Xr tpmr 4 . Changing this option will re-initialize the network interface. -.It Cm -tso -Disable TSO. -TSO is disabled by default. +.It Cm -tcprecvoffload +Disable LRO. +LRO is disabled by default. .It Cm up Mark an interface .Dq up . Index: sbin/ifconfig/ifconfig.c === RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v retrieving revision 1.462 diff -u -p -r1.462 ifconfig.c --- sbin/ifconfig/ifconfig.c8 Mar 2023 04:43:06 - 1.462 +++ sbin/ifconfig/ifconfig.c11 May 2023 17:33:55 - @@ -126,7 +126,7 @@ #define HWFEATURESBITS \ "\024\1CSUM_IPv4\2CSUM_TCPv4\3CSUM_UDPv4" \ "\5VLAN_MTU\6VLAN_HWTAGGING\10CSUM_TCPv6" \ - "\11CSUM_UDPv6\17TSO\20WOL" + "\11CSUM_UDPv6\15LRO\16TSOv4\17TSOv6\20WOL" struct ifencap { unsigned int ife_flags; @@ -469,8 +469,8 @@ const structcmd { { "-soii", IFXF_INET6_NOSOII, 0, setifxflags }, { "monitor",IFXF_MONITOR, 0, setifxflags }, { "-monitor", -IFXF_MONITOR, 0, setifxflags }, - { "tso",IFXF_TSO, 0, setifxflags }, - { "-tso", -IFXF_TSO, 0, setifxflags }, + { "tcprecvoffload", IFXF_LRO, 0, setifxflags }, + { "-tcprecvoffload", -IFXF_LRO, 0, setifxflags }, #ifndef SMALL { "hwfeatures", NEXTARG0, 0, printifhwfeatures }, { "metric", NEXTARG,0, setifmetric }, @@ -674,7 +674,7 @@ const structcmd { "\7RUNNING\10NOARP\11PROMISC\12ALLMULTI\13OACTIVE\14SIMPLEX"\ "\15LINK0\16LINK1\17LINK2\20MULTICAST" \ "\23AUTOCONF6TEMP\24MPLS\25WOL\26AUTOCONF6\27INET6_NOSOII" \ - "\30AUTOCONF4" "\31MONITOR" "\32TSO" + "\30AUTOCONF4" "\31MONITOR" "\32LRO" intgetinfo(struct ifreq *, int); void getsock(int); Index:
Re: ifconfig: SIOCSIFFLAGS: device not configured
On Thu, May 11, 2023 at 09:17:37PM +0200, Hrvoje Popovski wrote: > is it possible to change "ifconfig: SIOCSIFFLAGS: device not configured" > message that it has an interface name in it, something like: > ifconfig pfsync0: SIOCSIFFLAGS: device not configured <- in my case. > > I have many vlans and static routes in my setup and while testing some > diffs, it took me a long time to figure out which interface the message > was coming from. > > starting network > add host 10.11.2.69: gateway 10.12.253.225 > add host 10.250.184.36: gateway 10.12.253.225 > add host 9.9.9.9: gateway 10.12.253.225 > add host 10.11.1.234: gateway 10.12.253.225 > add host 10.11.1.235: gateway 10.12.253.225 > add host 10.11.255.123: gateway 10.12.253.225 > add net 10.101/16: gateway 10.12.253.225 > ifconfig: SIOCSIFFLAGS: Device not configured > add net 16/8: gateway 192.168.100.112 > add net a192:a168:a100:a100::/64: gateway 192:168:1000:1000::112 > add net 48/8: gateway 192.168.111.112 > add net a192:a168:a111:a111::/64: gateway 192:168::::112 > reordering: ld.so libc libcrypto sshd. > > or when I'm doing sh /etc/netstart and have aggr interface > > ifconfig: SIOCSTRUNKPORT: Device busy > ifconfig: SIOCSTRUNKPORT: Device busy > > to change > ifconfig ix0: SIOCSTRUNKPORT: Device busy > ifconfig ix1: SIOCSTRUNKPORT: Device busy I also run into this issue sometimes. So, here is diff that prints the interface name in front of most of these anonym error messages. ok? Jan Index: ifconfig.c === RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v retrieving revision 1.462 diff -u -p -r1.462 ifconfig.c --- ifconfig.c 8 Mar 2023 04:43:06 - 1.462 +++ ifconfig.c 12 May 2023 14:14:01 - @@ -1070,14 +1070,14 @@ printgroup(char *groupname, int ifaliase errno == ENOENT) return (-1); else - err(1, "SIOCGIFGMEMB"); + err(1, "%s: SIOCGIFGMEMB", ifgr.ifgr_name); } len = ifgr.ifgr_len; if ((ifgr.ifgr_groups = calloc(1, len)) == NULL) err(1, "printgroup"); if (ioctl(sock, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1) - err(1, "SIOCGIFGMEMB"); + err(1, "%s: SIOCGIFGMEMB", ifgr.ifgr_name); for (ifg = ifgr.ifgr_groups; ifg && len >= sizeof(struct ifg_req); ifg++) { @@ -1099,7 +1099,7 @@ printgroupattribs(char *groupname) bzero(&ifgr, sizeof(ifgr)); strlcpy(ifgr.ifgr_name, groupname, sizeof(ifgr.ifgr_name)); if (ioctl(sock, SIOCGIFGATTR, (caddr_t)&ifgr) == -1) - err(1, "SIOCGIFGATTR"); + err(1, "%s: SIOCGIFGATTR", ifgr.ifgr_name); printf("%s:", groupname); printf(" carp demote count %d", ifgr.ifgr_attrib.ifg_carp_demoted); @@ -1122,7 +1122,8 @@ setgroupattribs(char *groupname, int arg if (argc > 1) { neg = strtonum(argv[1], 0, 128, &errstr); if (errstr) - errx(1, "invalid carp demotion: %s", errstr); + errx(1, "%s: invalid carp demotion: %s", ifgr.ifgr_name, + errstr); } if (p[0] == '-') { @@ -1135,7 +1136,7 @@ setgroupattribs(char *groupname, int arg usage(); if (ioctl(sock, SIOCSIFGATTR, (caddr_t)&ifgr) == -1) - err(1, "SIOCSIFGATTR"); + err(1, "%s: SIOCSIFGATTR", ifgr.ifgr_name); } void @@ -1249,7 +1250,7 @@ clone_create(const char *addr, int param (void) strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); if (ioctl(sock, SIOCIFCREATE, &ifr) == -1) - err(1, "SIOCIFCREATE"); + err(1, "%s: SIOCIFCREATE", ifr.ifr_name); } void @@ -1258,7 +1259,7 @@ clone_destroy(const char *addr, int para (void) strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); if (ioctl(sock, SIOCIFDESTROY, &ifr) == -1) - err(1, "SIOCIFDESTROY"); + err(1, "%s: SIOCIFDESTROY", ifr.ifr_name); } struct if_clonereq * @@ -1422,7 +1423,7 @@ setifflags(const char *vname, int value) bcopy((char *)&ifr, (char *)&my_ifr, sizeof(struct ifreq)); if (ioctl(sock, SIOCGIFFLAGS, (caddr_t)&my_ifr) == -1) - err(1, "SIOCGIFFLAGS"); + err(1, "%s: SIOCGIFFLAGS", my_ifr.ifr_name); (void) strlcpy(my_ifr.ifr_name, ifname, sizeof(my_ifr.ifr_name)); flags = my_ifr.ifr_flags; @@ -1433,7 +1434,7 @@ setifflags(const char *vname, int value) flags |= value; my_ifr.ifr_flags = flags; if (ioctl(sock, SIOCSIFFLAGS, (caddr_t)&my_ifr) == -1) - err(1, "SIOCSIFFLAGS"); + err(1, "%s: SIOCSIFFLAGS", my_ifr.ifr_name); } void @@ -1444,7 +1445,7 @@ setifxflags(const char *vname, int value bcopy((char *)&ifr, (char *)&my_ifr, sizeof(struct ifreq)); if (io
Re: seperate LRO/TSO flags
On Wed, May 10, 2023 at 11:13:04AM -0600, Todd C. Miller wrote: > On Wed, 10 May 2023 19:03:58 +0200, Jan Klemkow wrote: > > This diff introduces separate flags for TCP offloading. We split this > > into LRO (large receive offloading) and TSO (TCP segmentation > > offloading). Thus, we are able to turn it on/off separately. > > > > For ifconfig(8) we use "tcprecvoffload" and "tcpsendoffload". So, the > > user has a better insight of what this features are doing. > > Is it possible to control these at the address family level? In > other words, is it possible to enable "tcprecvoffload" and > "tcpsendoffload" for inet but not inet6 or vice versa? For tcprecvoffload and ix(4) it's not possible to enable/disable it per address family. Its just one flag for the hardware. For tcpsendoffload its possible, but I won't do that till its necessary. Why would you want to differentiate the address families here? bye, Jan
seperate LRO/TSO flags
Hi, This diff introduces separate flags for TCP offloading. We split this into LRO (large receive offloading) and TSO (TCP segmentation offloading). Thus, we are able to turn it on/off separately. For ifconfig(8) we use "tcprecvoffload" and "tcpsendoffload". So, the user has a better insight of what this features are doing. ok? bye, Jan Index: sbin/ifconfig/ifconfig.8 === RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v retrieving revision 1.394 diff -u -p -r1.394 ifconfig.8 --- sbin/ifconfig/ifconfig.826 Apr 2023 02:38:08 - 1.394 +++ sbin/ifconfig/ifconfig.810 May 2023 16:22:30 - @@ -282,8 +282,10 @@ tag. As CSUM_TCPv4, but supports IPv6 datagrams. .It Sy CSUM_UDPv6 As above, for UDP. +.It Sy LRO +The device supports TCP large receive offloading (LRO). .It Sy TSO -The device supports TCP segment offloading (TSO). +The device supports TCP segmentation offloading (TSO). .It Sy WOL The device supports Wake on LAN (WoL). .It Sy hardmtu @@ -491,10 +493,30 @@ Query and display information and diagno modules installed in an interface. It is only supported by drivers implementing the necessary functionality on hardware which supports it. -.It Cm tso +.It Cm tcprecvoffload +Enable TCP large receive offloading (LRO) if it's supported by the hardware; see +.Cm hwfeatures . +LRO enabled network interfaces modify received TCP/IP packets. +This will also affect traffic of upper layer interfaces, +such as +.Xr vlan 4 , +.Xr aggr 4 , +and +.Xr carp 4 . +It is not possible to use LRO with interfaces attached to a +.Xr bridge 4 , +.Xr veb 4 , +or +.Xr tpmr 4 . +Changing this option will re-initialize the network interface. +.It Cm -tcprecvoffload +Disable LRO. +LRO is disabled by default. +.It Cm tcpsendoffload Enable TCP segmentation offloading (TSO) if it's supported by the hardware; see .Cm hwfeatures . -TSO enabled NICs modify received TCP/IP packets. +TSO enabled network interfaces are able to split large TCP segments into smaller +peaces that fits into MTU and MSS. This will also affect traffic of upper layer interfaces, such as .Xr vlan 4 , @@ -506,8 +528,7 @@ It is not possible to use TSO with inter .Xr veb 4 , or .Xr tpmr 4 . -Changing this option will re-initialize the network interface. -.It Cm -tso +.It Cm -tcpsendoffload Disable TSO. TSO is disabled by default. .It Cm up Index: sbin/ifconfig/ifconfig.c === RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v retrieving revision 1.462 diff -u -p -r1.462 ifconfig.c --- sbin/ifconfig/ifconfig.c8 Mar 2023 04:43:06 - 1.462 +++ sbin/ifconfig/ifconfig.c10 May 2023 15:40:17 - @@ -126,7 +126,7 @@ #define HWFEATURESBITS \ "\024\1CSUM_IPv4\2CSUM_TCPv4\3CSUM_UDPv4" \ "\5VLAN_MTU\6VLAN_HWTAGGING\10CSUM_TCPv6" \ - "\11CSUM_UDPv6\17TSO\20WOL" + "\11CSUM_UDPv6\16LRO\17TSO\20WOL" struct ifencap { unsigned int ife_flags; @@ -469,8 +469,10 @@ const struct cmd { { "-soii", IFXF_INET6_NOSOII, 0, setifxflags }, { "monitor",IFXF_MONITOR, 0, setifxflags }, { "-monitor", -IFXF_MONITOR, 0, setifxflags }, - { "tso",IFXF_TSO, 0, setifxflags }, - { "-tso", -IFXF_TSO, 0, setifxflags }, + { "tcprecvoffload", IFXF_LRO, 0, setifxflags }, + { "-tcprecvoffload", -IFXF_LRO, 0, setifxflags }, + { "tcpsendoffload", IFXF_TSO, 0, setifxflags }, + { "-tcpsendoffload", -IFXF_TSO, 0, setifxflags }, #ifndef SMALL { "hwfeatures", NEXTARG0, 0, printifhwfeatures }, { "metric", NEXTARG,0, setifmetric }, @@ -674,7 +676,7 @@ const structcmd { "\7RUNNING\10NOARP\11PROMISC\12ALLMULTI\13OACTIVE\14SIMPLEX"\ "\15LINK0\16LINK1\17LINK2\20MULTICAST" \ "\23AUTOCONF6TEMP\24MPLS\25WOL\26AUTOCONF6\27INET6_NOSOII" \ - "\30AUTOCONF4" "\31MONITOR" "\32TSO" + "\30AUTOCONF4" "\31MONITOR" "\32LRO" "\33TSO" intgetinfo(struct ifreq *, int); void getsock(int); Index: sys/dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.193 diff -u -p -r1.193 if_ix.c --- sys/dev/pci/if_ix.c 28 Apr 2023 10:18:57 - 1.193 +++ sys/dev/pci/if_ix.c 10 May 2023 16:32:44 - @@ -1925,7 +1925,7 @@ ixgbe_setup_interface(struct ix_softc *s ifp->if_capabilities |= IFCAP_CSUM_IPv4; if (sc->hw.mac.type != ixgbe_mac_82598EB) - ifp->if_capabilities |= IFCAP_TSO; + ifp->if_capabilities |= IFCAP_LRO; /* * Sp
Re: software tcp send offloading
On Tue, May 09, 2023 at 09:56:36AM +0200, Alexander Bluhm wrote: > On Sun, May 07, 2023 at 09:00:31PM +0200, Alexander Bluhm wrote: > > Not sure if I addressed all corner cases already. I think IPsec > > is missing. > > Updated diff: > - parts have been commited > - works with IPsec now Thanks for this solution. Looks much better to me, then an IPSec lookup in tcp_output() as its done in FreeBSD. > - some bugs fixed > - sysctl net.inet.tcp.tso > - netstat TSO counter > > If you test this, recompile sysctl and netstat with new kernel > headers. Then you can see, whether the diff has an effect on your > setup. > > # netstat -s -p tcp | grep TSO > 79 output TSO packets software chopped > 0 output TSO packets hardware processed > 840 output TSO packets generated > 0 output TSO packets dropped Good idea. > If you run into problems, disable the feature, and report if the > problem goes away. This helps to locate the bug. > > # sysctl net.inet.tcp.tso=0 > net.inet.tcp.tso: 1 -> 0 > > I would like to keep the sysctl for now. It makes performance > comparison easier. When we add hardware TSO it can be a quick > workaround for driver problems. > > When this has been tested a bit, I think it is ready for commit. > Remaining issues can be handled in tree. My tests pass, I am not > aware of TCP problems. I also did some testing in my setups. Everything works. > ok? Diff looks fine to me, too. ok jan@ > bluhm > > Index: sys/net/pf.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v > retrieving revision 1.1177 > diff -u -p -r1.1177 pf.c > --- sys/net/pf.c 8 May 2023 13:22:13 - 1.1177 > +++ sys/net/pf.c 8 May 2023 22:37:04 - > @@ -6561,6 +6561,16 @@ pf_route(struct pf_pdesc *pd, struct pf_ > goto done; > } > > + if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) && > + m0->m_pkthdr.ph_mss <= ifp->if_mtu) { > + if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) || > + if_output_ml(ifp, &ml, sintosa(dst), rt)) > + goto done; > + tcpstat_inc(tcps_outswtso); > + goto done; > + } > + CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO); > + > /* >* Too large for interface; fragment if possible. >* Must be able to put at least 8 bytes per fragment. > @@ -6594,6 +6604,7 @@ void > pf_route6(struct pf_pdesc *pd, struct pf_state *st) > { > struct mbuf *m0; > + struct mbuf_list ml; > struct sockaddr_in6 *dst, sin6; > struct rtentry *rt = NULL; > struct ip6_hdr *ip6; > @@ -6685,11 +6696,21 @@ pf_route6(struct pf_pdesc *pd, struct pf > goto done; > } > > - if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { > + if (m0->m_pkthdr.len <= ifp->if_mtu) { > in6_proto_cksum_out(m0, ifp); > ifp->if_output(ifp, m0, sin6tosa(dst), rt); > goto done; > } > + > + if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) && > + m0->m_pkthdr.ph_mss <= ifp->if_mtu) { > + if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) || > + if_output_ml(ifp, &ml, sin6tosa(dst), rt)) > + goto done; > + tcpstat_inc(tcps_outswtso); > + goto done; > + } > + CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO); > > ip6stat_inc(ip6s_cantfrag); > if (st->rt != PF_DUPTO) > Index: sys/netinet/in.h > === > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in.h,v > retrieving revision 1.142 > diff -u -p -r1.142 in.h > --- sys/netinet/in.h 11 Apr 2023 00:45:09 - 1.142 > +++ sys/netinet/in.h 8 May 2023 13:47:48 - > @@ -780,6 +780,7 @@ int in_canforward(struct in_addr); > int in_cksum(struct mbuf *, int); > int in4_cksum(struct mbuf *, u_int8_t, int, int); > voidin_proto_cksum_out(struct mbuf *, struct ifnet *); > +int in_ifcap_cksum(struct mbuf *, struct ifnet *, int); > voidin_ifdetach(struct ifnet *); > int in_mask2len(struct in_addr *); > voidin_len2mask(struct in_addr *, int); > Index: sys/netinet/ip_output.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v > retrieving revision 1.384 > diff -u -p -r1.384 ip_output.c > --- sys/netinet/ip_output.c 8 May 2023 13:22:13 - 1.384 > +++ sys/netinet/ip_output.c 8 May 2023 22:37:04 - > @@ -84,7 +84,6 @@ void ip_mloopback(struct ifnet *, struct > static __inline u_int16_t __attribute__((__unused__)) > in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t); > void in_delayed_cksum(struct mbuf *); > -int in_ifcap_cksum(struct mbuf *, struct ifnet *, int); > > int ip_output_ipsec_looku
Re: em(4) multiqueue
On Fri, Apr 14, 2023 at 10:26:14AM +0800, Kevin Lo wrote: > On Thu, Apr 13, 2023 at 01:30:36PM -0500, Brian Conway wrote: > > Reviving this thread, apologies for discontinuity in mail readers: > > https://marc.info/?t=16564219358 > > > > After rebasing on 7.3, my results have mirrored Hrvoje's testing at > > the end of that thread. No issues with throughput, unusual latency, > > or reliability. `vmstat -i` shows some level of balancing between > > the queues. I've been testing on as many em(4) systems as I have > > access to, some manually, some in a packet forwarder/firewall > > scenarios: > > Last time I tested (about a year go) on I211, rx locked up if I tried > something > like iperf3 or tcpbench. Don't know if you have a similar problem. I rebased the rest to current and tested it with tcpbench between the following interfaces: em0 at pci7 dev 0 function 0 "Intel 82580" rev 0x01, msix, 4 queues, address 90:e2:ba:df:d5:2c em0 at pci5 dev 0 function 0 "Intel I350" rev 0x01, msix, 8 queues, address 00:25:90:eb:b3:c2 After a second the connection stucked. As far as I can see, the sending side got a problem. ot45# tcpbench 192.168.99.3 elapsed_ms bytes mbps bwidth 1012 14574120 115.210 100.00% Conn: 1 Mbps: 115.210 Peak Mbps: 115.210 Avg Mbps: 115.210 2022 00.000-nan% ... ot46# tcpbench -s elapsed_ms bytes mbps bwidth 1017 14313480 112.594 100.00% Conn: 1 Mbps: 112.594 Peak Mbps: 112.594 Avg Mbps: 112.594 2027 00.000-nan% ... ot45# netstat -nf inet -p tcp Active Internet connections Proto Recv-Q Send-Q Local Address Foreign AddressTCP-State tcp 0 260640 192.168.99.1.18530 192.168.99.3.12345 CLOSING When I retried it, it sometimes work and most times not. kstat tells me, that transmit queues 1 to 3 are oactive and just 0 works: em0:0:txq:0 packets: 4042648 packets bytes: 5310138322 bytes qdrops: 9 packets errors: 0 packets qlen: 0 packets maxqlen: 511 packets oactive: false em0:0:txq:1 packets: 9812 packets bytes: 14846716 bytes qdrops: 0 packets errors: 0 packets qlen: 184 packets maxqlen: 511 packets oactive: true em0:0:txq:2 packets: 690362 packets bytes: 60011484 bytes qdrops: 0 packets errors: 0 packets qlen: 185 packets maxqlen: 511 packets oactive: true em0:0:txq:3 packets: 443181 packets bytes: 43829886 bytes qdrops: 0 packets errors: 0 packets qlen: 198 packets maxqlen: 511 packets oactive: true This is the rebased diff on current i tested: Index: dev/pci/files.pci === RCS file: /cvs/src/sys/dev/pci/files.pci,v retrieving revision 1.361 diff -u -p -r1.361 files.pci --- dev/pci/files.pci 23 Apr 2023 00:20:26 - 1.361 +++ dev/pci/files.pci 25 Apr 2023 11:25:47 - @@ -334,7 +334,7 @@ attach fxp at pci with fxp_pci file dev/pci/if_fxp_pci.cfxp_pci # Intel Pro/1000 -device em: ether, ifnet, ifmedia +device em: ether, ifnet, ifmedia, intrmap, stoeplitz attach em at pci file dev/pci/if_em.c em file dev/pci/if_em_hw.c em Index: dev/pci/if_em.c === RCS file: /cvs/src/sys/dev/pci/if_em.c,v retrieving revision 1.365 diff -u -p -r1.365 if_em.c --- dev/pci/if_em.c 9 Feb 2023 21:21:27 - 1.365 +++ dev/pci/if_em.c 25 Apr 2023 11:25:47 - @@ -247,6 +247,7 @@ int em_intr(void *); int em_allocate_legacy(struct em_softc *); void em_start(struct ifqueue *); int em_ioctl(struct ifnet *, u_long, caddr_t); +int em_rxrinfo(struct em_softc *, struct if_rxrinfo *); void em_watchdog(struct ifnet *); void em_init(void *); void em_stop(void *, int); @@ -309,8 +310,10 @@ int em_setup_queues_msix(struct em_soft int em_queue_intr_msix(void *); int em_link_intr_msix(void *); void em_enable_queue_intr_msix(struct em_queue *); +void em_setup_rss(struct em_softc *); #else #define em_allocate_msix(_sc) (-1) +#define em_setup_rss(_sc) 0 #endif #if NKSTAT > 0 @@ -333,7 +336,6 @@ struct cfdriver em_cd = { }; static int em_smart_pwr_down = FALSE; -int em_enable_msix = 0; /* * Device identification routine @@ -629,12 +631,12 @@ err_pci: void em_start(struct ifqueue *ifq) { + struct em_queue *que = ifq->ifq_softc; struct ifnet *ifp = ifq->ifq_if; struct em_softc *sc = ifp->if_softc; u_int head, free, used; struct mbuf *m; int post = 0; - struct em_que
Re: libcrypto: Fix EINVAL in openssl/tls_init
On Fri, Mar 24, 2023 at 10:02:05PM +0100, Theo Buehler wrote: > > Thus, I would suggest to set this constant to ELAST. So, we will avoid > > useless unknown error strings and a non-zero errno after tls_init(). > > ELAST isn't portable. It's under __BSD_VISIBLE in sys/errno.h. > > It would seem better to use the save_errno idiom to store the errno > at the start of the loop and restore it at the end. > > And yes, we should fix this, after unluck. ok? Thanks, Jan Index: err/err.c === RCS file: /cvs/src/lib/libcrypto/err/err.c,v retrieving revision 1.50 diff -u -p -r1.50 err.c --- err/err.c 26 Dec 2022 07:18:52 - 1.50 +++ err/err.c 27 Mar 2023 07:58:25 - @@ -580,6 +580,7 @@ build_SYS_str_reasons(void) static char strerror_tab[NUM_SYS_STR_REASONS][LEN_SYS_STR_REASON]; int i; static int init = 1; + int save_errno = errno; CRYPTO_r_lock(CRYPTO_LOCK_ERR); if (!init) { @@ -594,6 +595,8 @@ build_SYS_str_reasons(void) return; } + /* strerror(3) will set errno to EINVAL when i is an unknown error. */ + save_errno = errno; for (i = 1; i <= NUM_SYS_STR_REASONS; i++) { ERR_STRING_DATA *str = &SYS_str_reasons[i - 1]; @@ -610,6 +613,7 @@ build_SYS_str_reasons(void) if (str->string == NULL) str->string = "unknown"; } + errno = save_errno; /* Now we still have SYS_str_reasons[NUM_SYS_STR_REASONS] = {0, NULL}, * as required by ERR_load_strings. */
libcrypto: Fix EINVAL in openssl/tls_init
Hi, after tls_init() and OPENSSL_init_ssl() errno is always set to EINVAL. This is caused by a routine that tries to prefetch all error strings up to 127 from strerror(3). But, strerror(3) sets EINVAL for unknown values of error. Thus, I would suggest to set this constant to ELAST. So, we will avoid useless unknown error strings and a non-zero errno after tls_init(). I guess this is not serious enough for the current release. But, we might fix this after unlocking of the tree? ok? bye, Jan Index: lib/libcrypto//err/err.c === RCS file: /cvs/src/lib/libcrypto/err/err.c,v retrieving revision 1.50 diff -u -p -r1.50 err.c --- lib/libcrypto//err/err.c26 Dec 2022 07:18:52 - 1.50 +++ lib/libcrypto//err/err.c24 Mar 2023 20:07:18 - @@ -560,7 +560,7 @@ int_err_get_next_lib(void) #ifndef OPENSSL_NO_ERR -#define NUM_SYS_STR_REASONS 127 +#define NUM_SYS_STR_REASONS ELAST #define LEN_SYS_STR_REASON 32 static ERR_STRING_DATA SYS_str_reasons[NUM_SYS_STR_REASONS + 1];
Re: refactor mbuf parsing on driver level
On Mon, Feb 06, 2023 at 09:47:57PM +0100, Christian Weisgerber wrote: > Christian Weisgerber: > > > I also switched over em(4) to this and have successfully used it > > for a full 30-hour package build on the four amd64 ports machines > > with their I350 interfaces. Additionally, I've done some IPv6 > > testing at home over an I210. > > ok for this? I tested it with I350. Diff look fine. ok jan@ > igc(4) has very similar code, but I don't have access to a machine > with that hardware. Send me an ssh-key and I give you access to this machine: http://obsd-lab.genua.de/hw/ot34.html Thanks, Jan > > diff f8646d27d4041e5f595c04e17a876f12600deea7 > > f3f95d0cc0957a2f1e961cace4c3c9dd869e8c9e > > commit - f8646d27d4041e5f595c04e17a876f12600deea7 > > commit + f3f95d0cc0957a2f1e961cace4c3c9dd869e8c9e > > blob - c840377f0a3f1ef3c3e3072657698d8085ffd3a0 > > blob + 523ed5b0a18718c50bb30e2995d293fa1d2199a6 > > --- sys/dev/pci/if_em.c > > +++ sys/dev/pci/if_em.c > > @@ -2398,12 +2398,11 @@ em_tx_ctx_setup(struct em_queue *que, struct mbuf > > *mp, > > em_tx_ctx_setup(struct em_queue *que, struct mbuf *mp, u_int head, > > u_int32_t *olinfo_status, u_int32_t *cmd_type_len) > > { > > + struct ether_extracted ext; > > struct e1000_adv_tx_context_desc *TD; > > - struct ether_header *eh = mtod(mp, struct ether_header *); > > - struct mbuf *m; > > uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; > > - int off = 0, hoff; > > - uint8_t ipproto, iphlen; > > + int off = 0; > > + uint8_t iphlen; > > > > *olinfo_status = 0; > > *cmd_type_len = 0; > > @@ -2418,44 +2417,26 @@ em_tx_ctx_setup(struct em_queue *que, struct mbuf > > *mp, > > } > > #endif > > > > - vlan_macip_lens |= (sizeof(*eh) << E1000_ADVTXD_MACLEN_SHIFT); > > - > > - switch (ntohs(eh->ether_type)) { > > - case ETHERTYPE_IP: { > > - struct ip *ip; > > + ether_extract_headers(mp, &ext); > > > > - m = m_getptr(mp, sizeof(*eh), &hoff); > > - ip = (struct ip *)(mtod(m, caddr_t) + hoff); > > + vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT); > > > > - iphlen = ip->ip_hl << 2; > > - ipproto = ip->ip_p; > > + if (ext.ip4) { > > + iphlen = ext.ip4->ip_hl << 2; > > > > type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; > > if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) { > > *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; > > off = 1; > > } > > - > > - break; > > - } > > #ifdef INET6 > > - case ETHERTYPE_IPV6: { > > - struct ip6_hdr *ip6; > > + } else if (ext.ip6) { > > + iphlen = sizeof(*ext.ip6); > > > > - m = m_getptr(mp, sizeof(*eh), &hoff); > > - ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); > > - > > - iphlen = sizeof(*ip6); > > - ipproto = ip6->ip6_nxt; > > - > > type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; > > - break; > > - } > > #endif > > - default: > > + } else { > > iphlen = 0; > > - ipproto = 0; > > - break; > > } > > > > *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS; > > @@ -2464,21 +2445,18 @@ em_tx_ctx_setup(struct em_queue *que, struct mbuf > > *mp, > > vlan_macip_lens |= iphlen; > > type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; > > > > - switch (ipproto) { > > - case IPPROTO_TCP: > > + if (ext.tcp) { > > type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; > > if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) { > > *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; > > off = 1; > > } > > - break; > > - case IPPROTO_UDP: > > + } else if (ext.udp) { > > type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; > > if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) { > > *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; > > off = 1; > > } > > - break; > > } > > > > if (!off) > > > > -- > Christian "naddy" Weisgerber na...@mips.inka.de >
Re: refactor mbuf parsing on driver level
On Tue, Jan 31, 2023 at 09:12:51PM +0100, Christian Weisgerber wrote: > Jan Klemkow: > > > - I turned the KASSERTS to returns. > > - Check if the mbuf is large enough for an ether header. > > - additionally #ifdef'd INET6 around the ip6_hdr in the new struct > > For non-initial fragments of TCP/UDP packets, ether_extract_headers() > will create ext.tcp/ext.udp pointers that do not point to a protocol > header. Should there be a check to exclude fragments? yes. bluhm also suggested this solution to me. ok? Thanks, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.191 diff -u -p -r1.191 if_ix.c --- dev/pci/if_ix.c 26 Jan 2023 07:32:39 - 1.191 +++ dev/pci/if_ix.c 31 Jan 2023 21:05:40 - @@ -2477,25 +2477,16 @@ static inline int ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) { - struct ether_header *eh = mtod(mp, struct ether_header *); - struct mbuf *m; - int hoff; + struct ether_extracted ext; int offload = 0; uint32_t iphlen; - uint8_t ipproto; - *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + ether_extract_headers(mp, &ext); - switch (ntohs(eh->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; + *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT); - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); - - iphlen = ip->ip_hl << 2; - ipproto = ip->ip_p; + if (ext.ip4) { + iphlen = ext.ip4->ip_hl << 2; if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) { *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; @@ -2503,46 +2494,30 @@ ixgbe_csum_offload(struct mbuf *mp, uint } *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - break; - } - #ifdef INET6 - case ETHERTYPE_IPV6: { - struct ip6_hdr *ip6; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6)); - ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); - - iphlen = sizeof(*ip6); - ipproto = ip6->ip6_nxt; + } else if (ext.ip6) { + iphlen = sizeof(*ext.ip6); *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; - } #endif - - default: + } else { return offload; } *vlan_macip_lens |= iphlen; - switch (ipproto) { - case IPPROTO_TCP: + if (ext.tcp) { *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) { *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; offload = 1; } - break; - case IPPROTO_UDP: + } else if (ext.udp) { *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) { *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; offload = 1; } - break; } return offload; Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.86 diff -u -p -r1.86 if_ixl.c --- dev/pci/if_ixl.c26 Jan 2023 07:32:39 - 1.86 +++ dev/pci/if_ixl.c31 Jan 2023 21:05:40 - @@ -2784,10 +2784,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm static uint64_t ixl_tx_setup_offload(struct mbuf *m0) { - struct mbuf *m; - int hoff; + struct ether_extracted ext; uint64_t hlen; - uint8_t ipproto; uint64_t offload = 0; if (ISSET(m0->m_flags, M_VLANTAG)) { @@ -2800,39 +2798,21 @@ ixl_tx_setup_offload(struct mbuf *m0) M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) return (offload); - switch (ntohs(mtod(m0, struct ether_header *)->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; - - m = m_getptr(m0, ETHER_HDR_LEN, &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); + ether_extract_headers(m0, &ext); + if (ext.ip4) { offload |= ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) ? I
Re: refactor mbuf parsing on driver level
On Fri, Jan 27, 2023 at 04:44:36PM +0100, Christian Weisgerber wrote: > > The ether_extract_headers() diff was reverted, because is wrong for the > > cases other than tcp/udp/icmp. We need to fix it and recommit again > > before continue. > > I think (TCP or) UDP fragments are the problem. Fragments don't have > the protocol header but will still end up here: > > case IPPROTO_UDP: > m = m_getptr(m, hoff + hlen, &hoff); > KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ext->udp)); > ext->udp = (struct udphdr *)(mtod(m, caddr_t) + hoff); > break; > > If a tail fragment is too short, it will trigger the KASSERT(). > > Previously, this wasn't a problem, because if there was such a > KASSERT() as in ixl(4), it was behind a M_*_CSUM_OUT check, and we > never set those flags for fragments. I changed the diff below to be more robust and reconstruct my test equipment to build permanently over NFS. - I turned the KASSERTS to returns. - Check if the mbuf is large enough for an ether header. - additionally #ifdef'd INET6 around the ip6_hdr in the new struct Tested the diff on NFS client and server with several kernel builds. ok? Thanks, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.191 diff -u -p -r1.191 if_ix.c --- dev/pci/if_ix.c 26 Jan 2023 07:32:39 - 1.191 +++ dev/pci/if_ix.c 27 Jan 2023 13:37:13 - @@ -2477,25 +2477,16 @@ static inline int ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) { - struct ether_header *eh = mtod(mp, struct ether_header *); - struct mbuf *m; - int hoff; + struct ether_extracted ext; int offload = 0; uint32_t iphlen; - uint8_t ipproto; - *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + ether_extract_headers(mp, &ext); - switch (ntohs(eh->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; + *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT); - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); - - iphlen = ip->ip_hl << 2; - ipproto = ip->ip_p; + if (ext.ip4) { + iphlen = ext.ip4->ip_hl << 2; if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) { *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; @@ -2503,46 +2494,30 @@ ixgbe_csum_offload(struct mbuf *mp, uint } *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - break; - } - #ifdef INET6 - case ETHERTYPE_IPV6: { - struct ip6_hdr *ip6; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6)); - ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); - - iphlen = sizeof(*ip6); - ipproto = ip6->ip6_nxt; + } else if (ext.ip6) { + iphlen = sizeof(*ext.ip6); *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; - } #endif - - default: + } else { return offload; } *vlan_macip_lens |= iphlen; - switch (ipproto) { - case IPPROTO_TCP: + if (ext.tcp) { *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) { *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; offload = 1; } - break; - case IPPROTO_UDP: + } else if (ext.udp) { *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) { *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; offload = 1; } - break; } return offload; Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.86 diff -u -p -r1.86 if_ixl.c --- dev/pci/if_ixl.c26 Jan 2023 07:32:39 - 1.86 +++ dev/pci/if_ixl.c27 Jan 2023 13:37:13 - @@ -2784,10 +2784,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm static uint64_t ixl_tx_setup_offload(struct mbuf *m0) { - struct mbuf *m; - int hoff; + struct ether_extracted ext; uint64_t hlen; - uint8_t ipproto; uint64_t offload = 0; if (ISSET(m0->m_flags, M_VLANTAG)) { @@ -2800,39 +2798,21 @@ ixl_tx_setup_offload(struct mbuf *m0) M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
Re: refactor mbuf parsing on driver level
On Thu, Jan 26, 2023 at 02:06:28PM +0300, Vitaliy Makkoveev wrote: > On Thu, Jan 26, 2023 at 11:37:51AM +0100, Christian Weisgerber wrote: > > Jan Klemkow: > > > > > we have several drivers which have to parse the content of mbufs. This > > > diff suggest a central parsing function for this. Thus, we can reduce > > > redundant code. > > > > > > I just start with ix(4) and ixl(4) because it was easy to test for me. > > > But, this could also improve em(4), igc(4), ale(4) and oce(4). > > > > Here's the corresponding change for em(4). > > This only affects 82575, 82576, i350, and i210. > > Tested on i210. > > > > ok? > > > > The ether_extract_headers() diff was reverted, because is wrong for the > cases other than tcp/udp/icmp. We need to fix it and recommit again > before continue. I'm already on the way, to fix this mess. I'll send a new diff soon. Sorry this inconvenience, Jan
Re: refactor mbuf parsing on driver level
On Tue, Jan 24, 2023 at 05:40:55PM +0300, Vitaliy Makkoveev wrote: > On Tue, Jan 24, 2023 at 03:14:36PM +0100, Jan Klemkow wrote: > > On Tue, Jan 24, 2023 at 09:32:53PM +1000, David Gwynne wrote: > > > On Mon, Jan 23, 2023 at 09:25:34AM +0100, Jan Klemkow wrote: > > > > On Wed, Jan 18, 2023 at 03:49:25PM -0700, Theo de Raadt wrote: > > > > > Jan Klemkow wrote: > > > > > > On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote: > > > > > > > On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote: > > > > > > > > we have several drivers which have to parse the content of > > > > > > > > mbufs. This > > > > > > > > diff suggest a central parsing function for this. Thus, we can > > > > > > > > reduce > > > > > > > > redundant code. > > > > > > > > > > > > > > > > I just start with ix(4) and ixl(4) because it was easy to test > > > > > > > > for me. > > > > > > > > But, this could also improve em(4), igc(4), ale(4) and oce(4). > > > > > > > > > > > > > > > > I'm not sure about the name, the api nor the place of this > > > > > > > > code. So, if > > > > > > > > someone has a better idea: i'm open to anything. > > > > > > > > > > > > > > I like code this deduplication. > > > > > > > > > > > > > > This newly introduced function doesn't touch ifnet but only > > > > > > > extracts > > > > > > > protocol headers from mbuf(9). I guess mbuf_extract_headers() or > > > > > > > something like is much better for name with the ern/uipc_mbuf2.c > > > > > > > as > > > > > > > place. > > > > > > > > > > > > Good Point. Updates diff below. > > > > > > > > > > I agree, "extract" is a better name. dlg, do you have a comment? > > > > > > > > Whats you opinion about this diff? > > > > > > it makes ix and ixl prettier, so that's a good enough reason to do > > > it. it should go in net/if_ethersubr.c as ether_extract_headers() > > > though. > > > > > > could you try using a struct to carry the header pointers around and see > > > what that looks like? > > > > > > struct ether_extracted { > > > struct ether_header *eh; > > > struct ip *ip4; > > > struct ip6_hdr *ip6; > > > struct tcphdr *tcp; > > > struct udphdr *udp; > > > }; > > > > > > void ether_extract_headers(struct mbuf *, struct ether_extracted *); > > > > > > you can add a depth or flags argument if you want to be able to > > > tell it to return before looking for the tcp/udp headers if you > > > want. > > Looks better then m_extract_headers(). Since ext->eh is always assigned > to non NULL value below, the "ext->eh = NULL;" is not necessary. Also > I'm not sure, but is memset() more reliable for `ext' zeroing? Anyway, > feel free to commit without memset(). OK? Thanks, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.189 diff -u -p -r1.189 if_ix.c --- dev/pci/if_ix.c 2 Sep 2022 14:08:09 - 1.189 +++ dev/pci/if_ix.c 24 Jan 2023 13:34:17 - @@ -2477,25 +2477,16 @@ static inline int ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) { - struct ether_header *eh = mtod(mp, struct ether_header *); - struct mbuf *m; - int hoff; + struct ether_extracted ext; int offload = 0; uint32_t iphlen; - uint8_t ipproto; - *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + ether_extract_headers(mp, &ext); - switch (ntohs(eh->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; + *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT); - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); - - iphlen = ip->ip_hl << 2; - ipproto = ip->ip_p; +
Re: refactor mbuf parsing on driver level
On Tue, Jan 24, 2023 at 09:32:53PM +1000, David Gwynne wrote: > On Mon, Jan 23, 2023 at 09:25:34AM +0100, Jan Klemkow wrote: > > On Wed, Jan 18, 2023 at 03:49:25PM -0700, Theo de Raadt wrote: > > > Jan Klemkow wrote: > > > > On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote: > > > > > On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote: > > > > > > we have several drivers which have to parse the content of mbufs. > > > > > > This > > > > > > diff suggest a central parsing function for this. Thus, we can > > > > > > reduce > > > > > > redundant code. > > > > > > > > > > > > I just start with ix(4) and ixl(4) because it was easy to test for > > > > > > me. > > > > > > But, this could also improve em(4), igc(4), ale(4) and oce(4). > > > > > > > > > > > > I'm not sure about the name, the api nor the place of this code. > > > > > > So, if > > > > > > someone has a better idea: i'm open to anything. > > > > > > > > > > I like code this deduplication. > > > > > > > > > > This newly introduced function doesn't touch ifnet but only extracts > > > > > protocol headers from mbuf(9). I guess mbuf_extract_headers() or > > > > > something like is much better for name with the ern/uipc_mbuf2.c as > > > > > place. > > > > > > > > Good Point. Updates diff below. > > > > > > I agree, "extract" is a better name. dlg, do you have a comment? > > > > Whats you opinion about this diff? > > it makes ix and ixl prettier, so that's a good enough reason to do > it. it should go in net/if_ethersubr.c as ether_extract_headers() > though. > > could you try using a struct to carry the header pointers around and see > what that looks like? > > struct ether_extracted { > struct ether_header *eh; > struct ip *ip4; > struct ip6_hdr *ip6; > struct tcphdr *tcp; > struct udphdr *udp; > }; > > void ether_extract_headers(struct mbuf *, struct ether_extracted *); > > you can add a depth or flags argument if you want to be able to > tell it to return before looking for the tcp/udp headers if you > want. OK? Thanks, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.189 diff -u -p -r1.189 if_ix.c --- dev/pci/if_ix.c 2 Sep 2022 14:08:09 - 1.189 +++ dev/pci/if_ix.c 24 Jan 2023 13:34:17 - @@ -2477,25 +2477,16 @@ static inline int ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) { - struct ether_header *eh = mtod(mp, struct ether_header *); - struct mbuf *m; - int hoff; + struct ether_extracted ext; int offload = 0; uint32_t iphlen; - uint8_t ipproto; - *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + ether_extract_headers(mp, &ext); - switch (ntohs(eh->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; + *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT); - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); - - iphlen = ip->ip_hl << 2; - ipproto = ip->ip_p; + if (ext.ip4) { + iphlen = ext.ip4->ip_hl << 2; if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) { *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; @@ -2503,46 +2494,30 @@ ixgbe_csum_offload(struct mbuf *mp, uint } *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - break; - } - #ifdef INET6 - case ETHERTYPE_IPV6: { - struct ip6_hdr *ip6; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6)); - ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); - - iphlen = sizeof(*ip6); - ipproto = ip6->ip6_nxt; + } else if (ext.ip6) { + iphlen = sizeof(*ext.ip6); *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; - } #endif - - default: + } else {
ifconfig(8): fix output of missing ipv6 addresses
Hi, ifconfig doesn't print ipv6 addresses if its used with media option. # ifconfig -A vio0: flags=8843 mtu 1500 ... inet 10.0.1.65 netmask 0xff00 broadcast 10.0.1.255 inet6 fe80::5054:ff:fe6a:b6fd%vio0 prefixlen 64 scopeid 0x1 inet6 fc00:1::1 prefixlen 64 inet 192.168.0.1 netmask 0xff00 broadcast 192.168.0.255 # ifconfig -A media vio0: flags=8843 mtu 1500 ... supported media: media autoselect inet 10.0.1.65 netmask 0xff00 broadcast 10.0.1.255 inet 192.168.0.1 netmask 0xff00 broadcast 192.168.0.255 As the diff below shows, afp is NULL by default, but set to inet if there is an additional program parameter. At the end, no specific address family is assumed if afp is NULL. Thus, the diff below introduces a new variable to remember if a specific address family was set by the user or not for printing all interface addresses. The regression test of ifconfig(8) is passing with the diff below. ok? bye, Jan Index: ifconfig.c === RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v retrieving revision 1.461 diff -u -p -r1.461 ifconfig.c --- ifconfig.c 18 Jan 2023 21:57:10 - 1.461 +++ ifconfig.c 23 Jan 2023 12:11:30 - @@ -746,6 +746,7 @@ const struct afswtch { }; const struct afswtch *afp; /*the address family being set or asked about*/ +const struct afswtch *pafp;/*the address family being used for printing*/ char joinname[IEEE80211_NWID_LEN]; size_t joinlen; @@ -840,7 +841,7 @@ main(int argc, char *argv[]) if (argc > 0) { for (afp = rafp = afs; rafp->af_name; rafp++) if (strcmp(rafp->af_name, *argv) == 0) { - afp = rafp; + pafp = afp = rafp; argc--; argv++; break; @@ -1216,7 +1217,7 @@ printif(char *name, int ifaliases) (ifa->ifa_addr->sa_family == AF_INET && ifaliases == 0 && noinet == 0)) continue; - if ((p = afp) != NULL) { + if ((p = pafp) != NULL) { if (ifa->ifa_addr->sa_family == p->af_af) p->af_status(1); } else { @@ -3514,7 +3515,7 @@ status(int link, struct sockaddr_dl *sdl proto_status: if (link == 0) { - if ((p = afp) != NULL) { + if ((p = pafp) != NULL) { p->af_status(1); } else for (p = afs; p->af_name; p++) { ifr.ifr_addr.sa_family = p->af_af;
Re: refactor mbuf parsing on driver level
On Thu, Jan 19, 2023 at 02:55:29PM +0300, Vitaliy Makkoveev wrote: > On Thu, Jan 19, 2023 at 10:40:52AM +0100, Jan Klemkow wrote: > > On Thu, Jan 19, 2023 at 12:02:29PM +0300, Vitaliy Makkoveev wrote: > > > On Thu, Jan 19, 2023 at 01:55:57AM +0300, Vitaliy Makkoveev wrote: > > > > > On 19 Jan 2023, at 01:39, Jan Klemkow wrote: > > > > > On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote: > > > > >> On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote: > > > > >>> we have several drivers which have to parse the content of mbufs. > > > > >>> This > > > > >>> diff suggest a central parsing function for this. Thus, we can > > > > >>> reduce > > > > >>> redundant code. > > > > >>> > > > > >>> I just start with ix(4) and ixl(4) because it was easy to test for > > > > >>> me. > > > > >>> But, this could also improve em(4), igc(4), ale(4) and oce(4). > > > > >>> > > > > >>> I'm not sure about the name, the api nor the place of this code. > > > > >>> So, if > > > > >>> someone has a better idea: i'm open to anything. > > > > >> > > > > >> I like code this deduplication. > > > > >> > > > > >> This newly introduced function doesn't touch ifnet but only extracts > > > > >> protocol headers from mbuf(9). I guess mbuf_extract_headers() or > > > > >> something like is much better for name with the ern/uipc_mbuf2.c as > > > > >> place. > > > > > > > > > > Good Point. Updates diff below. > > > > > > > > > > + > > > > > +/* Parse different TCP/IP protocol headers for a quick view inside > > > > > an mbuf. */ > > > > > +void > > > > > +m_exract_headers(struct mbuf *mp, struct ether_header **eh, struct > > > > > ip **ip4, > > > > > +struct ip6_hdr **ip6, struct tcphdr **tcp, struct udphdr **udp) > > > > > + > > > > > > > > Should be m_extract_headers(). The rest of the diff looks good to me. > > > > > > Please wait. > > > > > > The mandatory nullification of `ip4', `ip6' and other variables passed > > > to m_exract_headers() is not obvious. It is much better to return > > > the integer result of extraction like m_tag_copy_chain() does. > > > > Yes, the mandatory nullification seems to be more errorprone. In my > > opinion is the number of results it not that useful. You have to check > > the retuned pointers anyway. > > > > I moved the nullification inside of m_exract_headers(). > > This is better. I also like the last return statement be removed from > m_extract_headers() before commit. Fixed below. Plus a suggestion from mpi to not pollute the namespace with all the headers in mbuf.h. Moved them to uipc_mbuf2.c. Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.189 diff -u -p -r1.189 if_ix.c --- dev/pci/if_ix.c 2 Sep 2022 14:08:09 - 1.189 +++ dev/pci/if_ix.c 19 Jan 2023 09:29:10 - @@ -2477,23 +2477,18 @@ static inline int ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) { - struct ether_header *eh = mtod(mp, struct ether_header *); - struct mbuf *m; - int hoff; + struct ether_header *eh; + struct ip *ip; + struct ip6_hdr *ip6; int offload = 0; uint32_t iphlen; uint8_t ipproto; - *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + m_extract_headers(mp, &eh, &ip, &ip6, NULL, NULL); - switch (ntohs(eh->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); + *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + if (ip) { iphlen = ip->ip_hl << 2; ipproto = ip->ip_p; @@ -2503,26 +2498,14 @@ ixgbe_csum_offload(struct mbuf *mp, uint } *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - break; - } - #ifdef INET6 - case ETHERTYPE_IPV6: { -
Re: refactor mbuf parsing on driver level
On Thu, Jan 19, 2023 at 12:02:29PM +0300, Vitaliy Makkoveev wrote: > On Thu, Jan 19, 2023 at 01:55:57AM +0300, Vitaliy Makkoveev wrote: > > > On 19 Jan 2023, at 01:39, Jan Klemkow wrote: > > > > > > On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote: > > >> On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote: > > >>> we have several drivers which have to parse the content of mbufs. This > > >>> diff suggest a central parsing function for this. Thus, we can reduce > > >>> redundant code. > > >>> > > >>> I just start with ix(4) and ixl(4) because it was easy to test for me. > > >>> But, this could also improve em(4), igc(4), ale(4) and oce(4). > > >>> > > >>> I'm not sure about the name, the api nor the place of this code. So, if > > >>> someone has a better idea: i'm open to anything. > > >> > > >> I like code this deduplication. > > >> > > >> This newly introduced function doesn't touch ifnet but only extracts > > >> protocol headers from mbuf(9). I guess mbuf_extract_headers() or > > >> something like is much better for name with the ern/uipc_mbuf2.c as > > >> place. > > > > > > Good Point. Updates diff below. > > > > > > + > > > +/* Parse different TCP/IP protocol headers for a quick view inside an > > > mbuf. */ > > > +void > > > +m_exract_headers(struct mbuf *mp, struct ether_header **eh, struct ip > > > **ip4, > > > +struct ip6_hdr **ip6, struct tcphdr **tcp, struct udphdr **udp) > > > + > > > > Should be m_extract_headers(). The rest of the diff looks good to me. > > > > Please wait. > > The mandatory nullification of `ip4', `ip6' and other variables passed > to m_exract_headers() is not obvious. It is much better to return > the integer result of extraction like m_tag_copy_chain() does. Yes, the mandatory nullification seems to be more errorprone. In my opinion is the number of results it not that useful. You have to check the retuned pointers anyway. I moved the nullification inside of m_exract_headers(). Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.189 diff -u -p -r1.189 if_ix.c --- dev/pci/if_ix.c 2 Sep 2022 14:08:09 - 1.189 +++ dev/pci/if_ix.c 19 Jan 2023 09:29:10 - @@ -2477,23 +2477,18 @@ static inline int ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) { - struct ether_header *eh = mtod(mp, struct ether_header *); - struct mbuf *m; - int hoff; + struct ether_header *eh; + struct ip *ip; + struct ip6_hdr *ip6; int offload = 0; uint32_t iphlen; uint8_t ipproto; - *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + m_extract_headers(mp, &eh, &ip, &ip6, NULL, NULL); - switch (ntohs(eh->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); + *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + if (ip) { iphlen = ip->ip_hl << 2; ipproto = ip->ip_p; @@ -2503,26 +2498,14 @@ ixgbe_csum_offload(struct mbuf *mp, uint } *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - break; - } - #ifdef INET6 - case ETHERTYPE_IPV6: { - struct ip6_hdr *ip6; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6)); - ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); - + } else if (ip6) { iphlen = sizeof(*ip6); ipproto = ip6->ip6_nxt; *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; - } #endif - - default: + } else { return offload; } Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.84 diff -u -p -r1.84 if_ixl.c --- dev/pci/if_ixl.c5 Aug 2022 13:57:16 - 1.84 +++ dev/pci/if_ixl.c19 Jan 2023 09:29:17 - @@ -2784,8 +2784,10 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm static uint64_t ixl_tx_setup_offload(struct
Re: refactor mbuf parsing on driver level
On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote: > On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote: > > we have several drivers which have to parse the content of mbufs. This > > diff suggest a central parsing function for this. Thus, we can reduce > > redundant code. > > > > I just start with ix(4) and ixl(4) because it was easy to test for me. > > But, this could also improve em(4), igc(4), ale(4) and oce(4). > > > > I'm not sure about the name, the api nor the place of this code. So, if > > someone has a better idea: i'm open to anything. > > I like code this deduplication. > > This newly introduced function doesn't touch ifnet but only extracts > protocol headers from mbuf(9). I guess mbuf_extract_headers() or > something like is much better for name with the ern/uipc_mbuf2.c as > place. Good Point. Updates diff below. Thanks, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.189 diff -u -p -r1.189 if_ix.c --- dev/pci/if_ix.c 2 Sep 2022 14:08:09 - 1.189 +++ dev/pci/if_ix.c 18 Jan 2023 21:06:58 - @@ -2477,23 +2477,18 @@ static inline int ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) { - struct ether_header *eh = mtod(mp, struct ether_header *); - struct mbuf *m; - int hoff; + struct ether_header *eh = NULL; + struct ip *ip = NULL; + struct ip6_hdr *ip6 = NULL; int offload = 0; uint32_t iphlen; uint8_t ipproto; - *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + m_exract_headers(mp, &eh, &ip, &ip6, NULL, NULL); - switch (ntohs(eh->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); + *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + if (ip) { iphlen = ip->ip_hl << 2; ipproto = ip->ip_p; @@ -2503,26 +2498,14 @@ ixgbe_csum_offload(struct mbuf *mp, uint } *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - break; - } - #ifdef INET6 - case ETHERTYPE_IPV6: { - struct ip6_hdr *ip6; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6)); - ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); - + } else if (ip6) { iphlen = sizeof(*ip6); ipproto = ip6->ip6_nxt; *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; - } #endif - - default: + } else { return offload; } Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.84 diff -u -p -r1.84 if_ixl.c --- dev/pci/if_ixl.c5 Aug 2022 13:57:16 - 1.84 +++ dev/pci/if_ixl.c18 Jan 2023 20:47:01 - @@ -2784,12 +2784,15 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm static uint64_t ixl_tx_setup_offload(struct mbuf *m0) { - struct mbuf *m; - int hoff; + struct ether_header *eh = NULL; + struct ip *ip = NULL; + struct ip6_hdr *ip6 = NULL; + struct tcphdr *th = NULL; uint64_t hlen; uint8_t ipproto; uint64_t offload = 0; + if (ISSET(m0->m_flags, M_VLANTAG)) { uint64_t vtag = m0->m_pkthdr.ether_vtag; offload |= IXL_TX_DESC_CMD_IL2TAG1; @@ -2800,39 +2803,23 @@ ixl_tx_setup_offload(struct mbuf *m0) M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) return (offload); - switch (ntohs(mtod(m0, struct ether_header *)->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; - - m = m_getptr(m0, ETHER_HDR_LEN, &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); + m_exract_headers(m0, &eh, &ip, &ip6, &th, NULL); + if (ip) { offload |= ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) ? IXL_TX_DESC_CMD_IIPT_IPV4_CSUM : IXL_TX_DESC_CMD_IIPT_IPV4; hlen = ip->ip_hl << 2; ipproto = ip->ip_p; - break; - } - #ifdef INET6 - case ETHERTYPE_IPV6: { - struct ip6_hdr *ip6; - - m
Re: mem.4: be more accurate about securelevel
On Tue, Jan 17, 2023 at 11:02:07PM +0100, Theo Buehler wrote: > > at least this tool works for me: > > Surely you have kern.allowkmem=1 set. This diff should phrase it correctly. ok? Thanks, Jan Index: man4.alpha/mem.4 === RCS file: /cvs/src/share/man/man4/man4.alpha/mem.4,v retrieving revision 1.6 diff -u -p -r1.6 mem.4 --- man4.alpha/mem.412 Jan 2018 04:36:44 - 1.6 +++ man4.alpha/mem.418 Jan 2023 19:25:27 - @@ -63,11 +63,12 @@ kernel virtual memory begins at .Pp Even with sufficient file system permissions, these devices can only be opened when the -.Xr securelevel 7 -is insecure or when the .Va kern.allowkmem .Xr sysctl 2 variable is set. +Also the +.Xr securelevel 7 +insecure is needed, to open the device writable. .Sh FILES .Bl -tag -width /dev/kmem -compact .It /dev/mem Index: man4.amd64/mem.4 === RCS file: /cvs/src/share/man/man4/man4.amd64/mem.4,v retrieving revision 1.6 diff -u -p -r1.6 mem.4 --- man4.amd64/mem.412 Jan 2018 04:36:44 - 1.6 +++ man4.amd64/mem.418 Jan 2023 19:26:59 - @@ -64,11 +64,12 @@ The kernel virtual memory begins at addr .Pp Even with sufficient file system permissions, these devices can only be opened when the -.Xr securelevel 7 -is insecure or when the .Va kern.allowkmem .Xr sysctl 2 variable is set. +Also the +.Xr securelevel 7 +insecure is needed, to open the device writable. .Sh FILES .Bl -tag -width Pa -compact .It Pa /dev/mem Index: man4.hppa/mem.4 === RCS file: /cvs/src/share/man/man4/man4.hppa/mem.4,v retrieving revision 1.4 diff -u -p -r1.4 mem.4 --- man4.hppa/mem.4 12 Jan 2018 04:36:44 - 1.4 +++ man4.hppa/mem.4 18 Jan 2023 19:29:07 - @@ -52,11 +52,12 @@ address 0; kernel virtual memory begins .Pp Even with sufficient file system permissions, these devices can only be opened when the -.Xr securelevel 7 -is insecure or when the .Va kern.allowkmem .Xr sysctl 2 variable is set. +Also the +.Xr securelevel 7 +insecure is needed, to open the device writable. .Sh FILES .Bl -tag -width /dev/kmem -compact .It Pa /dev/mem Index: man4.i386/mem.4 === RCS file: /cvs/src/share/man/man4/man4.i386/mem.4,v retrieving revision 1.12 diff -u -p -r1.12 mem.4 --- man4.i386/mem.4 12 Jan 2018 04:36:44 - 1.12 +++ man4.i386/mem.4 18 Jan 2023 19:30:18 - @@ -64,11 +64,12 @@ long, and ends at virtual address .Pp Even with sufficient file system permissions, these devices can only be opened when the -.Xr securelevel 7 -is insecure or when the .Va kern.allowkmem .Xr sysctl 2 variable is set. +Also the +.Xr securelevel 7 +insecure is needed, to open the device writable. .Sh FILES .Bl -tag -width Pa -compact .It Pa /dev/mem Index: man4.landisk/mem.4 === RCS file: /cvs/src/share/man/man4/man4.landisk/mem.4,v retrieving revision 1.4 diff -u -p -r1.4 mem.4 --- man4.landisk/mem.4 12 Jan 2018 04:36:44 - 1.4 +++ man4.landisk/mem.4 18 Jan 2023 19:31:28 - @@ -59,11 +59,12 @@ The kernel virtual memory begins at addr .Pp Even with sufficient file system permissions, these devices can only be opened when the -.Xr securelevel 7 -is insecure or when the .Va kern.allowkmem .Xr sysctl 2 variable is set. +Also the +.Xr securelevel 7 +insecure is needed, to open the device writable. .Sh FILES .Bl -tag -width Pa -compact .It Pa /dev/mem Index: man4.loongson/mem.4 === RCS file: /cvs/src/share/man/man4/man4.loongson/mem.4,v retrieving revision 1.4 diff -u -p -r1.4 mem.4 --- man4.loongson/mem.4 12 Jan 2018 04:36:44 - 1.4 +++ man4.loongson/mem.4 18 Jan 2023 19:32:44 - @@ -89,11 +89,12 @@ The kernel virtual memory begins at addr .Pp Even with sufficient file system permissions, these devices can only be opened when the -.Xr securelevel 7 -is insecure or when the .Va kern.allowkmem .Xr sysctl 2 variable is set. +Also the +.Xr securelevel 7 +insecure is needed, to open the device writable. .Sh FILES .Bl -tag -width Pa -compact .It Pa /dev/mem Index: man4.luna88k/mem.4 === RCS file: /cvs/src/share/man/man4/man4.luna88k/mem.4,v retrieving revision 1.4 diff -u -p -r1.4 mem.4 --- man4.luna88k/mem.4 12 Jan 2018 04:36:44 - 1.4 +++ man4.luna88k/mem.4 18 Jan 2023 19:33:50 - @@ -63,11 +63,12 @@ kernel virtual memory begins at .Pp Even with sufficient file system permissions, these devices can only be opened when the -.Xr securelevel 7 -is insecure or when the .Va kern.allowkmem .Xr sysctl 2 variable is set. +Also the +.Xr securelevel 7 +insecure is needed, to open the device writable. .Sh FILES .Bl -tag
Re: mem.4: be more accurate about securelevel
On Tue, Jan 17, 2023 at 11:02:07PM +0100, Theo Buehler wrote: > > at least this tool works for me: > > Surely you have kern.allowkmem=1 set. Yes, I do.
refactor mbuf parsing on driver level
Hi, we have several drivers which have to parse the content of mbufs. This diff suggest a central parsing function for this. Thus, we can reduce redundant code. I just start with ix(4) and ixl(4) because it was easy to test for me. But, this could also improve em(4), igc(4), ale(4) and oce(4). I'm not sure about the name, the api nor the place of this code. So, if someone has a better idea: i'm open to anything. bye, Jan Index: dev/pci/if_ix.c === RCS file: /cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.189 diff -u -p -r1.189 if_ix.c --- dev/pci/if_ix.c 2 Sep 2022 14:08:09 - 1.189 +++ dev/pci/if_ix.c 17 Jan 2023 16:31:19 - @@ -2477,23 +2477,18 @@ static inline int ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) { - struct ether_header *eh = mtod(mp, struct ether_header *); - struct mbuf *m; - int hoff; + struct ether_header *eh = NULL; + struct ip *ip = NULL; + struct ip6_hdr *ip6 = NULL; int offload = 0; uint32_t iphlen; uint8_t ipproto; - *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + if_parse(mp, &eh, &ip, &ip6, NULL, NULL); - switch (ntohs(eh->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); + *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + if (ip) { iphlen = ip->ip_hl << 2; ipproto = ip->ip_p; @@ -2503,26 +2498,14 @@ ixgbe_csum_offload(struct mbuf *mp, uint } *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - break; - } - #ifdef INET6 - case ETHERTYPE_IPV6: { - struct ip6_hdr *ip6; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6)); - ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); - + } else if (ip6) { iphlen = sizeof(*ip6); ipproto = ip6->ip6_nxt; *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; - } #endif - - default: + } else { return offload; } Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.84 diff -u -p -r1.84 if_ixl.c --- dev/pci/if_ixl.c5 Aug 2022 13:57:16 - 1.84 +++ dev/pci/if_ixl.c16 Jan 2023 23:58:05 - @@ -2784,12 +2784,15 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm static uint64_t ixl_tx_setup_offload(struct mbuf *m0) { - struct mbuf *m; - int hoff; + struct ether_header *eh = NULL; + struct ip *ip = NULL; + struct ip6_hdr *ip6 = NULL; + struct tcphdr *th = NULL; uint64_t hlen; uint8_t ipproto; uint64_t offload = 0; + if (ISSET(m0->m_flags, M_VLANTAG)) { uint64_t vtag = m0->m_pkthdr.ether_vtag; offload |= IXL_TX_DESC_CMD_IL2TAG1; @@ -2800,39 +2803,23 @@ ixl_tx_setup_offload(struct mbuf *m0) M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) return (offload); - switch (ntohs(mtod(m0, struct ether_header *)->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; - - m = m_getptr(m0, ETHER_HDR_LEN, &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); + if_parse(m0, &eh, &ip, &ip6, &th, NULL); + if (ip) { offload |= ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) ? IXL_TX_DESC_CMD_IIPT_IPV4_CSUM : IXL_TX_DESC_CMD_IIPT_IPV4; hlen = ip->ip_hl << 2; ipproto = ip->ip_p; - break; - } - #ifdef INET6 - case ETHERTYPE_IPV6: { - struct ip6_hdr *ip6; - - m = m_getptr(m0, ETHER_HDR_LEN, &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6)); - ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); - + } else if (ip6) { offload |= IXL_TX_DESC_CMD_IIPT_IPV6; hlen = sizeof(*ip6); ipproto = ip6->ip6_nxt; - break; - } #endif - default: + } else { panic("CSUM_OUT set for non-IP packet"); /* NOTREACHED */ } @@ -2842,15 +2829,12 @@ ixl_tx_setup_offload(struct mbuf *m0) switch (ipproto) { case IPPROTO_TCP: { - struct tcphdr *th; - if (!ISSET(m0->m
Re: mem.4: be more accurate about securelevel
On Tue, Jan 17, 2023 at 04:23:48PM -0500, Bryan Steele wrote: > On Tue, Jan 17, 2023 at 09:37:24PM +0100, Jan Klemkow wrote: > > Hi, > > > > This diff adjust the manpage of mem(4) to be more accurate. You can > > open(2) mem(4) in securelevel 1 in readonly mode, but not writable. > > > > kern/spec_vnops.c: > > > > if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { > > ... > > /* > > * When running in secure mode, do not allow opens > > * for writing of /dev/mem, /dev/kmem, or character > > * devices whose corresponding block devices are > > * currently mounted. > > */ > > if (securelevel >= 1) { > > ... > > if (iskmemdev(dev)) > > return (EPERM); > > } > > } > > > > OK? > > > > bye, > > Jan > > Are you sure about that? Have you tested it? > > https://github.com/openbsd/src/commit/19aedf236181e81baf170421900911c82671fae4 at least this tool works for me: #include #include #include #include #include #include #include int main(void) { kvm_t *kd; int mem; struct nlist nl[] = { {"_ix_debug_ioctl"}, {NULL} }; char errbuf[_POSIX2_LINE_MAX]; if ((kd = kvm_open(_PATH_KSYMS, NULL, NULL, O_RDWR, errbuf)) == NULL) errx(EXIT_FAILURE, "%s", errbuf); if (kvm_nlist(kd, nl) == -1) errx(EXIT_SUCCESS, "%s", kvm_geterr(kd)); if (kvm_read(kd, nl[0].n_value, &mem, sizeof mem) != sizeof(mem)) errx(EXIT_SUCCESS, "%s", kvm_geterr(kd)); printf("mem: %d\n", mem); mem = 1; if (kvm_write(kd, nl[0].n_value, &mem, sizeof mem) != sizeof(mem)) errx(EXIT_SUCCESS, "%s", kvm_geterr(kd)); if (kvm_close(kd) == -1) err(EXIT_FAILURE, "kvm_close"); return EXIT_SUCCESS; }
mem.4: be more accurate about securelevel
Hi, This diff adjust the manpage of mem(4) to be more accurate. You can open(2) mem(4) in securelevel 1 in readonly mode, but not writable. kern/spec_vnops.c: if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { ... /* * When running in secure mode, do not allow opens * for writing of /dev/mem, /dev/kmem, or character * devices whose corresponding block devices are * currently mounted. */ if (securelevel >= 1) { ... if (iskmemdev(dev)) return (EPERM); } } OK? bye, Jan Index: man4.alpha/mem.4 === RCS file: /cvs/src/share/man/man4/man4.alpha/mem.4,v retrieving revision 1.6 diff -u -p -r1.6 mem.4 --- man4.alpha/mem.412 Jan 2018 04:36:44 - 1.6 +++ man4.alpha/mem.417 Jan 2023 18:51:10 - @@ -62,7 +62,7 @@ kernel virtual memory begins at .Li 0xfc23 . .Pp Even with sufficient file system permissions, -these devices can only be opened when the +these devices can only be opened writable when the .Xr securelevel 7 is insecure or when the .Va kern.allowkmem Index: man4.amd64/mem.4 === RCS file: /cvs/src/share/man/man4/man4.amd64/mem.4,v retrieving revision 1.6 diff -u -p -r1.6 mem.4 --- man4.amd64/mem.412 Jan 2018 04:36:44 - 1.6 +++ man4.amd64/mem.417 Jan 2023 18:48:23 - @@ -63,7 +63,7 @@ The kernel virtual memory begins at addr .Li 0x8000 . .Pp Even with sufficient file system permissions, -these devices can only be opened when the +these devices can only be opened writable when the .Xr securelevel 7 is insecure or when the .Va kern.allowkmem Index: man4.hppa/mem.4 === RCS file: /cvs/src/share/man/man4/man4.hppa/mem.4,v retrieving revision 1.4 diff -u -p -r1.4 mem.4 --- man4.hppa/mem.4 12 Jan 2018 04:36:44 - 1.4 +++ man4.hppa/mem.4 17 Jan 2023 18:52:28 - @@ -51,7 +51,7 @@ On hppa, the physical memory range is al address 0; kernel virtual memory begins at address 0 as well. .Pp Even with sufficient file system permissions, -these devices can only be opened when the +these devices can only be opened writable when the .Xr securelevel 7 is insecure or when the .Va kern.allowkmem Index: man4.i386/mem.4 === RCS file: /cvs/src/share/man/man4/man4.i386/mem.4,v retrieving revision 1.12 diff -u -p -r1.12 mem.4 --- man4.i386/mem.4 12 Jan 2018 04:36:44 - 1.12 +++ man4.i386/mem.4 17 Jan 2023 18:53:00 - @@ -63,7 +63,7 @@ long, and ends at virtual address .Li 0xfe00 . .Pp Even with sufficient file system permissions, -these devices can only be opened when the +these devices can only be opened writable when the .Xr securelevel 7 is insecure or when the .Va kern.allowkmem Index: man4.landisk/mem.4 === RCS file: /cvs/src/share/man/man4/man4.landisk/mem.4,v retrieving revision 1.4 diff -u -p -r1.4 mem.4 --- man4.landisk/mem.4 12 Jan 2018 04:36:44 - 1.4 +++ man4.landisk/mem.4 17 Jan 2023 18:53:54 - @@ -58,7 +58,7 @@ The kernel virtual memory begins at addr .Li 0xc000 . .Pp Even with sufficient file system permissions, -these devices can only be opened when the +these devices can only be opened writable when the .Xr securelevel 7 is insecure or when the .Va kern.allowkmem Index: man4.loongson/mem.4 === RCS file: /cvs/src/share/man/man4/man4.loongson/mem.4,v retrieving revision 1.4 diff -u -p -r1.4 mem.4 --- man4.loongson/mem.4 12 Jan 2018 04:36:44 - 1.4 +++ man4.loongson/mem.4 17 Jan 2023 18:54:33 - @@ -88,7 +88,7 @@ The kernel virtual memory begins at addr .Ad 0xc000 . .Pp Even with sufficient file system permissions, -these devices can only be opened when the +these devices can only be opened writable when the .Xr securelevel 7 is insecure or when the .Va kern.allowkmem Index: man4.luna88k/mem.4 === RCS file: /cvs/src/share/man/man4/man4.luna88k/mem.4,v retrieving revision 1.4 diff -u -p -r1.4 mem.4 --- man4.luna88k/mem.4 12 Jan 2018 04:36:44 - 1.4 +++ man4.luna88k/mem.4 17 Jan 2023 18:54:47 - @@ -62,7 +62,7 @@ kernel virtual memory begins at .Ad 0x . .Pp Even with sufficient file system permissions, -these devices can only be opened when the +these devices can only be opened writable when the .Xr securelevel 7 is insecure or when the .Va kern.allowkmem Index: man4.macppc/mem.4 ===
Fix kernel build without IPSEC option
Hi, if you build the kernel without IPSEC it will run into several compiler and linker errors. This diff add some missing #ifdefs to fix this. ok? bye, jan Index: net/if_pfsync.c === RCS file: /mount/openbsd/cvs/src/sys/net/if_pfsync.c,v retrieving revision 1.305 diff -u -p -r1.305 if_pfsync.c --- net/if_pfsync.c 21 Apr 2022 15:22:49 - 1.305 +++ net/if_pfsync.c 2 Nov 2022 10:20:38 - @@ -1576,7 +1576,9 @@ pfsync_grab_snapshot(struct pfsync_snaps int q; struct pf_state *st; struct pfsync_upd_req_item *ur; +#if defined(IPSEC) struct tdb *tdb; +#endif sn->sn_sc = sc; @@ -1602,6 +1604,7 @@ pfsync_grab_snapshot(struct pfsync_snaps } TAILQ_INIT(&sn->sn_tdb_q); +#if defined(IPSEC) while ((tdb = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { TAILQ_REMOVE(&sc->sc_tdb_q, tdb, tdb_sync_entry); TAILQ_INSERT_TAIL(&sn->sn_tdb_q, tdb, tdb_sync_snap); @@ -1611,6 +1614,7 @@ pfsync_grab_snapshot(struct pfsync_snaps SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED); mtx_leave(&tdb->tdb_mtx); } +#endif sn->sn_len = sc->sc_len; sc->sc_len = PFSYNC_MINPKT; @@ -1630,7 +1634,9 @@ pfsync_drop_snapshot(struct pfsync_snaps { struct pf_state *st; struct pfsync_upd_req_item *ur; +#if defined(IPSEC) struct tdb *t; +#endif int q; for (q = 0; q < PFSYNC_S_COUNT; q++) { @@ -1652,6 +1658,7 @@ pfsync_drop_snapshot(struct pfsync_snaps pool_put(&sn->sn_sc->sc_pool, ur); } +#if defined(IPSEC) while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) { TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_snap); mtx_enter(&t->tdb_mtx); @@ -1660,6 +1667,7 @@ pfsync_drop_snapshot(struct pfsync_snaps CLR(t->tdb_flags, TDBF_PFSYNC); mtx_leave(&t->tdb_mtx); } +#endif } int @@ -1748,7 +1756,6 @@ pfsync_sendout(void) struct pfsync_subheader *subh; struct pf_state *st; struct pfsync_upd_req_item *ur; - struct tdb *t; int offset; int q, count = 0; @@ -1842,7 +1849,10 @@ pfsync_sendout(void) sn.sn_plus = NULL; /* XXX memory leak ? */ } +#if defined(IPSEC) if (!TAILQ_EMPTY(&sn.sn_tdb_q)) { + struct tdb *t; + subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); @@ -1865,6 +1875,7 @@ pfsync_sendout(void) subh->len = sizeof(struct pfsync_tdb) >> 2; subh->count = htons(count); } +#endif /* walk the queues */ for (q = 0; q < PFSYNC_S_COUNT; q++) { @@ -2486,6 +2497,7 @@ pfsync_q_del(struct pf_state *st) pf_state_unref(st); } +#if defined(IPSEC) void pfsync_update_tdb(struct tdb *t, int output) { @@ -2540,7 +2552,9 @@ pfsync_update_tdb(struct tdb *t, int out CLR(t->tdb_flags, TDBF_PFSYNC_RPL); mtx_leave(&t->tdb_mtx); } +#endif +#if defined(IPSEC) void pfsync_delete_tdb(struct tdb *t) { @@ -2576,6 +2590,7 @@ pfsync_delete_tdb(struct tdb *t) tdb_unref(t); } +#endif void pfsync_out_tdb(struct tdb *t, void *buf) Index: netinet/ip_ipsp.c === RCS file: /mount/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v retrieving revision 1.273 diff -u -p -r1.273 ip_ipsp.c --- netinet/ip_ipsp.c 6 Aug 2022 15:57:59 - 1.273 +++ netinet/ip_ipsp.c 2 Nov 2022 12:09:22 - @@ -1081,7 +1081,7 @@ tdb_free(struct tdb *tdbp) tdbp->tdb_xform = NULL; } -#if NPFSYNC > 0 +#if NPFSYNC > 0 && defined(IPSEC) /* Cleanup pfsync references */ pfsync_delete_tdb(tdbp); #endif
if_parse_packet(): refactor packet parsing on driver level
Hi, We have a lot of redundant code on the network device driver layer, that parses the content of mbufs for ethernet, ip and tcp header. This diff introduces a new function if_parse_packet() to centralize this feature. It just refactors ix(4) and ixl(4) code because, I could test this cards and won't blowup this diff. But, igc(3), ale(4) and oce(4) could also be improved with this. Beside of refactoring, we'll need this kind of code in ix(4) and other drivers for better checksum and TSO support. I'm not sure about the correct naming or place for this helper function. Thus, nitpicking and bike shading is welcome. :) bye, Jan Index: dev/pci/if_ix.c === RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.189 diff -u -p -r1.189 if_ix.c --- dev/pci/if_ix.c 2 Sep 2022 14:08:09 - 1.189 +++ dev/pci/if_ix.c 24 Oct 2022 13:51:22 - @@ -2477,25 +2477,18 @@ static inline int ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) { - struct ether_header *eh = mtod(mp, struct ether_header *); - struct mbuf *m; - int hoff; int offload = 0; - uint32_t iphlen; uint8_t ipproto; - *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + struct if_hdr hdr; - switch (ntohs(eh->ether_type)) { - case ETHERTYPE_IP: { - struct ip *ip; + if_parse_packet(mp, &hdr); - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); + *vlan_macip_lens |= (hdr.l2len << IXGBE_ADVTXD_MACLEN_SHIFT); - iphlen = ip->ip_hl << 2; - ipproto = ip->ip_p; + switch (ntohs(hdr.eth->ether_type)) { + case ETHERTYPE_IP: { + ipproto = hdr.ip4->ip_p; if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) { *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; @@ -2508,15 +2501,7 @@ ixgbe_csum_offload(struct mbuf *mp, uint #ifdef INET6 case ETHERTYPE_IPV6: { - struct ip6_hdr *ip6; - - m = m_getptr(mp, sizeof(*eh), &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6)); - ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff); - - iphlen = sizeof(*ip6); - ipproto = ip6->ip6_nxt; - + ipproto = hdr.ip6->ip6_nxt; *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; break; } @@ -2526,7 +2511,7 @@ ixgbe_csum_offload(struct mbuf *mp, uint return offload; } - *vlan_macip_lens |= iphlen; + *vlan_macip_lens |= hdr.l3len; switch (ipproto) { case IPPROTO_TCP: Index: dev/pci/if_ixgb.h === RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixgb.h,v retrieving revision 1.19 diff -u -p -r1.19 if_ixgb.h --- dev/pci/if_ixgb.h 24 Nov 2015 17:11:39 - 1.19 +++ dev/pci/if_ixgb.h 24 Oct 2022 13:27:43 - @@ -54,6 +54,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include Index: dev/pci/if_ixl.c === RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.84 diff -u -p -r1.84 if_ixl.c --- dev/pci/if_ixl.c5 Aug 2022 13:57:16 - 1.84 +++ dev/pci/if_ixl.c24 Oct 2022 16:34:29 - @@ -2784,11 +2784,12 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm static uint64_t ixl_tx_setup_offload(struct mbuf *m0) { - struct mbuf *m; - int hoff; - uint64_t hlen; uint8_t ipproto; uint64_t offload = 0; + struct if_hdr hdr; + + memset(&hdr, 0, sizeof(hdr)); + if_parse_packet(m0, &hdr); if (ISSET(m0->m_flags, M_VLANTAG)) { uint64_t vtag = m0->m_pkthdr.ether_vtag; @@ -2800,35 +2801,20 @@ ixl_tx_setup_offload(struct mbuf *m0) M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) return (offload); - switch (ntohs(mtod(m0, struct ether_header *)->ether_type)) { + switch (ntohs(hdr.eth->ether_type)) { case ETHERTYPE_IP: { - struct ip *ip; - - m = m_getptr(m0, ETHER_HDR_LEN, &hoff); - KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip)); - ip = (struct ip *)(mtod(m, caddr_t) + hoff); - offload |= ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) ? IXL_TX_DESC_CMD_IIPT_IPV4_CSUM : IXL_TX_DESC_CMD_IIPT_IPV4; - hlen = ip->ip_hl << 2; - ipproto = ip->ip_p; + ipproto = hdr.ip4->ip_p; break; } #ifdef INET6
Re: ix(4): enable TCPv6/UDPv6 cksum offloading
On Wed, Jan 12, 2022 at 05:54:07PM +0100, Mark Kettenis wrote: > > Date: Wed, 12 Jan 2022 17:45:57 +0100 > > From: Jan Klemkow > > > > On Wed, Jan 12, 2022 at 05:36:01PM +0100, Mark Kettenis wrote: > > > > Date: Wed, 12 Jan 2022 17:02:03 +0100 > > > > From: Jan Klemkow > > > > > > > > Hi, > > > > > > > > This diff enables TCP and UDP checksum offloading in ix(4) for IPv6. > > > > > > > > IPv6 extension headers aren't a problem in this case. > > > > in6_proto_cksum_out() in netinet6/ip6_output.c disables checksum > > > > offloading if ip6_nxt is not TCP or UDP. Thus, we can just use this > > > > field. > > > > > > > > Tested with: > > > > ix0 at pci5 dev 0 function 0 "Intel 82599" rev 0x01, msix, 8 queues, > > > > address 00:1b:21:94:4c:48 > > > > > > > > OK? > > > > > > Isn't this the same disaster as the ixl(4) diff you sent earlier? We > > > have sparc64 machines with onboard ix(4)... > > > > Yes, but we don't parse the TCP header here. As bluhm@ figured out: > > The access to ip_hl does not generate an alignment problem on sparc64. > > Because, the bits of ip_hl are on the other of the byte, as bits of > > th_off. > > > > This diff just touches the IPv6 case, where we don't have this kind of > > problem, anyway. > > But you're still using m_getptr(), casting the result to a struct and > then look at a member of the struct, which may access data beyond the > end of the mbuf. We use the same pattern in re(4), vio(4) and the IPv4 case in ix(4). And we check for this case with the KASSERT(), except re(4). For me, it looks as this assumption is safe. > > > > Index: dev/pci/if_ix.c > > > > === > > > > RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ix.c,v > > > > retrieving revision 1.180 > > > > diff -u -p -r1.180 if_ix.c > > > > --- dev/pci/if_ix.c 27 Jul 2021 01:44:55 - 1.180 > > > > +++ dev/pci/if_ix.c 12 Jan 2022 14:53:14 - > > > > @@ -1879,7 +1879,8 @@ ixgbe_setup_interface(struct ix_softc *s > > > > ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; > > > > #endif > > > > > > > > - ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4; > > > > + ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 > > > > + | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; > > > > > > > > /* > > > > * Specify the media types supported by this sc and register > > > > @@ -2438,9 +2439,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, > > > > struct ether_header *eh; > > > > #endif > > > > struct ip *ip; > > > > -#ifdef notyet > > > > struct ip6_hdr *ip6; > > > > -#endif > > > > struct mbuf *m; > > > > int ipoff; > > > > uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0; > > > > @@ -2521,19 +2520,16 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, > > > > ipproto = ip->ip_p; > > > > type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; > > > > break; > > > > -#ifdef notyet > > > > case ETHERTYPE_IPV6: > > > > if (mp->m_pkthdr.len < ehdrlen + sizeof(*ip6)) > > > > return (-1); > > > > m = m_getptr(mp, ehdrlen, &ipoff); > > > > KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip6)); > > > > - ip6 = (struct ip6 *)(m->m_data + ipoff); > > > > + ip6 = (struct ip6_hdr *)(m->m_data + ipoff); > > > > ip_hlen = sizeof(*ip6); > > > > - /* XXX-BZ this will go badly in case of ext hdrs. */ > > > > ipproto = ip6->ip6_nxt; > > > > type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; > > > > break; > > > > -#endif > > > > default: > > > > offload = FALSE; > > > > break; > > > > Index: dev/pci/ixgbe.h > > > > === > > > > RCS file: /mount/openbsd/cvs/src/sys/dev/pci/ixgbe.h,v > > > > retrieving revision 1.32 > > > > diff -u -p -r1.32 ixgbe.h > > > > --- dev/pci/ixgbe.h 18 Jul 2020 07:18:22 - 1.32 > > > > +++ dev/pci/ixgbe.h 12 Jan 2022 14:57:13 - > > > > @@ -65,6 +65,7 @@ > > > > #include > > > > #include > > > > #include > > > > +#include > > > > > > > > #if NBPFILTER > 0 > > > > #include > > > > > > > > > > > > > >
diff: improve legibility of structs in several manpages
Hello, This diff harmonises the indentation of struct members and comments in several manpages. Also fixes line wraps of comments on 80 column terminals. General uses tabs for general indentation and 4 spaces on tight spots. Also uses extra space to align pointers and non-pointers as we do this on certain places in our source. OK? bye, Jan Index: sys/kbind.2 === RCS file: /cvs/src/lib/libc/sys/kbind.2,v retrieving revision 1.3 diff -u -p -r1.3 kbind.2 --- sys/kbind.2 1 Sep 2016 10:08:03 - 1.3 +++ sys/kbind.2 26 Oct 2021 14:15:37 - @@ -25,7 +25,7 @@ .Bd -literal struct __kbind { void*kb_addr; -size_t kb_size; +size_t kb_size; }; #define KBIND_BLOCK_MAX 2 /* powerpc and sparc64 need 2 blocks */ #define KBIND_DATA_MAX 24 /* sparc64 needs 6, four-byte words */ Index: sys/kqueue.2 === RCS file: /cvs/src/lib/libc/sys/kqueue.2,v retrieving revision 1.44 diff -u -p -r1.44 kqueue.2 --- sys/kqueue.222 Apr 2021 15:30:12 - 1.44 +++ sys/kqueue.226 Oct 2021 14:16:16 - @@ -141,11 +141,11 @@ The structure is defined as: .Bd -literal struct kevent { - uintptr_t ident; /* identifier for this event */ - short filter; /* filter for event */ - u_shortflags; /* action flags for kqueue */ - u_int fflags; /* filter flag value */ - int64_tdata;/* filter data value */ + uintptr_t ident; /* identifier for this event */ + short filter; /* filter for event */ + u_short flags; /* action flags for kqueue */ + u_int fflags; /* filter flag value */ + int64_t data; /* filter data value */ void *udata; /* opaque user data identifier */ }; .Ed Index: sys/ktrace.2 === RCS file: /cvs/src/lib/libc/sys/ktrace.2,v retrieving revision 1.38 diff -u -p -r1.38 ktrace.2 --- sys/ktrace.21 Sep 2021 15:51:45 - 1.38 +++ sys/ktrace.226 Oct 2021 14:17:20 - @@ -136,12 +136,12 @@ followed by a trace point specific struc The generic header is: .Bd -literal struct ktr_header { - uintktr_type; /* trace record type */ - pid_t ktr_pid;/* process id */ - pid_t ktr_tid;/* thread id */ - struct timespec ktr_time; /* timestamp */ - charktr_comm[MAXCOMLEN+1]; /* command name */ - size_t ktr_len;/* length of buf */ + uintktr_type; /* trace record type */ + pid_t ktr_pid;/* process id */ + pid_t ktr_tid;/* thread id */ + struct timespec ktr_time; /* timestamp */ + charktr_comm[MAXCOMLEN+1]; /* command name */ + size_t ktr_len;/* length of buf */ }; .Ed .Pp Index: sys/nfssvc.2 === RCS file: /cvs/src/lib/libc/sys/nfssvc.2,v retrieving revision 1.23 diff -u -p -r1.23 nfssvc.2 --- sys/nfssvc.231 May 2015 23:54:25 - 1.23 +++ sys/nfssvc.226 Oct 2021 15:54:29 - @@ -63,16 +63,16 @@ is called with the flag and a pointer to a structure: .Bd -literal struct nfsd_srvargs { -struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ -uid_t nsd_uid;/* Effective uid mapped to cred */ -u_int32_t nsd_haddr; /* IP address of client */ -struct xucred nsd_cr; /* Cred. uid maps to */ -int nsd_authlen;/* Length of auth string (ret) */ -u_char *nsd_authstr; /* Auth string (ret) */ -int nsd_verflen;/* and the verifier */ -u_char *nsd_verfstr; -struct timeval nsd_timestamp; /* timestamp from verifier */ -u_int32_t nsd_ttl;/* credential ttl (sec) */ +struct nfsd*nsd_nfsd; /* Pointer to in kernel nfsd struct */ +uid_t nsd_uid; /* Effective uid mapped to cred */ +u_int32_t nsd_haddr; /* IP address of client */ +struct xucred nsd_cr; /* Cred. uid maps to */ +intnsd_authlen; /* Length of auth string (ret) */ +u_char*nsd_authstr; /* Auth string (ret) */ +intnsd_verflen; /* and the verifier */ +u_char*nsd_verfstr; +struct timeval nsd_timestamp; /* timestamp from verifier */ +u_int32_t nsd_ttl; /* credential ttl (sec) */ }; .Ed .Pp @@ -87,9 +87,9 @@ with the flag and a pointer to a structure: .Bd -literal struct nfsd_args { -int sock; /* Socket to serve */ -caddr_t name;
Re: ixl(4): add rx/tx checksum offloading
On Tue, Oct 26, 2021 at 05:17:55PM +1000, Jonathan Matthew wrote: > First of all, thanks for looking at this, I forgot we hadn't done offloads > for ixl(4) yet. You're welcome. > In the case of ixl(4), the driver has to tell the nic the length of each of > the > packet headers, so it should also be tested with vlan interfaces. > > I think ixl_tx_setup_offload() needs to account for outgoing vlan-tagged > packets. Yes, it should. I just want to keep this diff small for now. I plan to implement handling of vlan tags in a later diff. The code just stops processing the offload and returns, if the stack tries to send out a vlan taged ethernet frame in the switch-statement at the beginning. So, with vlan tags we just don't offload checksumming at the moment. I also tested this scenario. > It currently assumes the ethernet header is ETHER_HDR_LEN bytes long, which > isn't > always true. See ixgbe_tx_ctx_setup() (sys/dev/pci/if_ix.c) for an example of > a driver that takes this into account. I already looked at this code and will adapt vlan tagging later, if this is OK for you? Thanks, Jan > > Index: dev/pci/if_ixl.c > > === > > RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v > > retrieving revision 1.75 > > diff -u -p -r1.75 if_ixl.c > > --- dev/pci/if_ixl.c23 Jul 2021 00:29:14 - 1.75 > > +++ dev/pci/if_ixl.c25 Oct 2021 15:11:46 - > > @@ -82,6 +82,10 @@ > > #endif > > > > #include > > +#include > > +#include > > +#include > > +#include > > #include > > > > #include > > @@ -1388,6 +1392,7 @@ static intixl_rxeof(struct ixl_softc *, > > static voidixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *); > > static voidixl_rxrefill(void *); > > static int ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *); > > +static voidixl_rx_checksum(struct mbuf *, uint64_t); > > > > #if NKSTAT > 0 > > static voidixl_kstat_attach(struct ixl_softc *); > > @@ -1942,9 +1947,9 @@ ixl_attach(struct device *parent, struct > > ifp->if_capabilities = IFCAP_VLAN_MTU; > > #if 0 > > ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; > > - ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | > > - IFCAP_CSUM_UDPv4; > > #endif > > + ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | > > + IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; > > > > ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status); > > > > @@ -2772,6 +2777,69 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm > > } > > > > static void > > +ixl_tx_setup_offload(struct mbuf *mp, uint64_t *cmd) > > +{ > > + uint64_t ip_hdr_len; > > + int ipoff = ETHER_HDR_LEN; > > + uint8_t ipproto; > > + struct ip *ip; > > +#ifdef INET6 > > + struct ip6_hdr *ip6; > > +#endif > > + struct tcphdr *th; > > + struct mbuf *m; > > + > > + switch (ntohs(mtod(mp, struct ether_header *)->ether_type)) { > > + case ETHERTYPE_IP: > > + if (mp->m_pkthdr.len < ETHER_HDR_LEN + sizeof(*ip)) > > + return; > > + m = m_getptr(mp, ETHER_HDR_LEN, &ipoff); > > + KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip)); > > + ip = (struct ip *)(m->m_data + ipoff); > > + > > + if (mp->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) > > + *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4_CSUM; > > + else > > + *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4; > > + > > + ip_hdr_len = ip->ip_hl << 2; > > + ipproto = ip->ip_p; > > + break; > > +#ifdef INET6 > > + case ETHERTYPE_IPV6: > > + if (mp->m_pkthdr.len < ETHER_HDR_LEN + sizeof(*ip6)) > > + return; > > + m = m_getptr(mp, ETHER_HDR_LEN, &ipoff); > > + KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip6)); > > + ip6 = (struct ip6_hdr *)(m->m_data + ipoff); > > + > > + *cmd |= IXL_TX_DESC_CMD_IIPT_IPV6; > > + > > + ip_hdr_len = sizeof(*ip6); > > + ipproto = ip6->ip6_nxt; > > + break; > > +#endif > > + default: > > + return; > > + } > > + > > + *cmd |= (ETHER_HDR_LEN >> 1) << IXL_TX_DESC_MACLEN_SHIFT; > > + *cmd |= (ip_hdr_len >> 2) << IXL_TX_DESC_IPLEN_SHIFT; > > + > > + if (ipproto == IPPROTO_TCP && m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { > > + th = (struct tcphdr *)(m->m_data + ipoff + ip_hdr_len); > > + > > + *cmd |= IXL_TX_DESC_CMD_L4T_EOFT_TCP; > > + *cmd |= (uint64_t)th->th_off << IXL_TX_DESC_L4LEN_SHIFT; > > + } > > + > > + if (ipproto == IPPROTO_UDP && m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { > > + *cmd |= IXL_TX_DESC_CMD_L4T_EOFT_UDP; > > + *cmd |= (sizeof(struct udphdr) >> 2) << IXL_TX_DESC_L4LEN_SHIFT; > > + } > > +} > > + > > +static void > > ixl_start(struct ifqueue *ifq) > > { >
Re: ixl(4): add rx/tx checksum offloading
On Fri, Oct 22, 2021 at 03:39:01PM +0200, Hrvoje Popovski wrote: > On 22.10.2021. 13:39, Jan Klemkow wrote: > > Thats because, you only see this flags, if the checksum offloading is > > enabled for "sending". I'm still working/debugging on the sending side. > > Thus, I just send a diff with the receiving part for now. > > > > You can see if its working for your card with the netstat(8) statistics. > > > > # netstat -s | grep software-checksummed > > > > These counters should not raise much on the receive side if you put some > > traffic over the interface. > > Thank you for explanation... > > I'm sending 8 tcp streams with iperf3 from some box to openbsd ixl box > and here are results: > > without diff > smc24# netstat -s | grep software-checksummed > 5039250 input datagrams software-checksummed > 2592718 output datagrams software-checksummed > 2592709 packets software-checksummed > 5039250 packets software-checksummed > 0 input packets software-checksummed > 0 output packets software-checksummed > > cca 6.12 Gbits/sec > > > > with diff > smc24# netstat -s | grep software-checksummed > 0 input datagrams software-checksummed > 2956546 output datagrams software-checksummed > 2956537 packets software-checksummed > 0 packets software-checksummed > 0 input packets software-checksummed > 0 output packets software-checksummed > > cca 6.70 Gbits/sec > > are result like those expected? > > is forwarding testing any good for checksum offload diffs? Hi Hrvoje, Thanks a lot for you big testing efforts! In case of forwarding the forwarding box just checks the IPv4 header checksum and ignores the UDP/TCP header. Your setup from one Box to another is fine. Here is a new diff, which also includes send checksum offloading. Thus, all software-checksummed numbers should stay low in both directions. Could you test this diff with your ospf{6}d and NFS tests? If you see IPv4 fragments in the ospf and NFS traffic within tcpdump(8), your test should find the bugs pointed out by deraadt@ and claudio@. You can provoke large NFS packets with the following options on your NFS mount point. server:/export /mnt nfs ro,intr,-r65536,-w65536 Thanks, Jan Index: dev/pci/if_ixl.c === RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.75 diff -u -p -r1.75 if_ixl.c --- dev/pci/if_ixl.c23 Jul 2021 00:29:14 - 1.75 +++ dev/pci/if_ixl.c25 Oct 2021 15:11:46 - @@ -82,6 +82,10 @@ #endif #include +#include +#include +#include +#include #include #include @@ -1388,6 +1392,7 @@ static intixl_rxeof(struct ixl_softc *, static voidixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *); static voidixl_rxrefill(void *); static int ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *); +static voidixl_rx_checksum(struct mbuf *, uint64_t); #if NKSTAT > 0 static voidixl_kstat_attach(struct ixl_softc *); @@ -1942,9 +1947,9 @@ ixl_attach(struct device *parent, struct ifp->if_capabilities = IFCAP_VLAN_MTU; #if 0 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; - ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | - IFCAP_CSUM_UDPv4; #endif + ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | + IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status); @@ -2772,6 +2777,69 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm } static void +ixl_tx_setup_offload(struct mbuf *mp, uint64_t *cmd) +{ + uint64_t ip_hdr_len; + int ipoff = ETHER_HDR_LEN; + uint8_t ipproto; + struct ip *ip; +#ifdef INET6 + struct ip6_hdr *ip6; +#endif + struct tcphdr *th; + struct mbuf *m; + + switch (ntohs(mtod(mp, struct ether_header *)->ether_type)) { + case ETHERTYPE_IP: + if (mp->m_pkthdr.len < ETHER_HDR_LEN + sizeof(*ip)) + return; + m = m_getptr(mp, ETHER_HDR_LEN, &ipoff); + KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip)); + ip = (struct ip *)(m->m_data + ipoff); + + if (mp->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) + *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4_CSUM; + else + *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4; + + ip_hdr_len = ip->ip_hl << 2; + ipproto = ip->ip_p; + break; +#ifdef INET6 + case ETHERTYPE_IPV6: +
ixl(4): add checksum receive offloading
Hi, this diff add hardware checksum offloading for the receive path of ixl(4) interfaces. Tested on: ixl1 at pci3 dev 0 function 1 "Intel X710 SFP+" rev 0x02: port 1, FW 6.0.48442 API 1.7, msix, 8 queues, address 40:a6:b7:02:38:3d OK? Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.75 diff -u -p -r1.75 if_ixl.c --- dev/pci/if_ixl.c23 Jul 2021 00:29:14 - 1.75 +++ dev/pci/if_ixl.c22 Oct 2021 09:20:59 - @@ -1388,6 +1388,7 @@ static intixl_rxeof(struct ixl_softc *, static voidixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *); static voidixl_rxrefill(void *); static int ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *); +static voidixl_rx_checksum(struct mbuf *, uint64_t); #if NKSTAT > 0 static voidixl_kstat_attach(struct ixl_softc *); @@ -3190,6 +3191,7 @@ ixl_rxeof(struct ixl_softc *sc, struct i m->m_pkthdr.csum_flags |= M_FLOWID; } + ixl_rx_checksum(m, word); ml_enqueue(&ml, m); } else { ifp->if_ierrors++; /* XXX */ @@ -3320,6 +3322,23 @@ ixl_rxrinfo(struct ixl_softc *sc, struct free(ifr, M_TEMP, ixl_nqueues(sc) * sizeof(*ifr)); return (rv); +} + +static void +ixl_rx_checksum(struct mbuf *m, uint64_t word) +{ + if (!ISSET(word, IXL_RX_DESC_L3L4P)) + return; + + if (ISSET(word, IXL_RX_DESC_IPE)) + return; + + m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK; + + if (ISSET(word, IXL_RX_DESC_L4E)) + return; + + m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK; } static int
Re: ixl(4): add checksum receive offloading
On Fri, Oct 22, 2021 at 12:01:41PM +0200, Hrvoje Popovski wrote: > On 22.10.2021. 11:25, Jan Klemkow wrote: > > this diff add hardware checksum offloading for the receive path of > > ixl(4) interfaces. > > > > Tested on: > > ixl1 at pci3 dev 0 function 1 "Intel X710 SFP+" rev 0x02: port 1, FW > > 6.0.48442 API 1.7, msix, 8 queues, address 40:a6:b7:02:38:3d > > > > OK? > > I've applied this diff and i can't see anything regarding offload with > ifconfig ixl hwfeatures? Hi Hrvoje, Thats because, you only see this flags, if the checksum offloading is enabled for "sending". I'm still working/debugging on the sending side. Thus, I just send a diff with the receiving part for now. You can see if its working for your card with the netstat(8) statistics. # netstat -s | grep software-checksummed These counters should not raise much on the receive side if you put some traffic over the interface. Thanks for testing, Jan > smc24# ifconfig ixl0 hwfeatures > ixl0: flags=8843 mtu 1500 > hwfeatures=10 hardmtu 9712 > lladdr 3c:fd:fe:04:0d:64 > index 7 priority 0 llprio 3 > media: Ethernet autoselect (10GSFP+Cu full-duplex) > status: active > inet 192.168.15.1 netmask 0xff00 broadcast 192.168.15.255 > > smc24# dmesg | grep ixl > ixl0 at pci21 dev 0 function 0 "Intel X710 SFP+" rev 0x01: port 0, FW > 8.2.64244 API 1.13, msix, 8 queues, address 3c:fd:fe:04:0d:64 > ixl1 at pci21 dev 0 function 1 "Intel X710 SFP+" rev 0x01: port 1, FW > 8.2.64244 API 1.13, msix, 8 queues, address 3c:fd:fe:04:0d:66 > > > Index: dev/pci/if_ixl.c > > === > > RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v > > retrieving revision 1.75 > > diff -u -p -r1.75 if_ixl.c > > --- dev/pci/if_ixl.c23 Jul 2021 00:29:14 - 1.75 > > +++ dev/pci/if_ixl.c22 Oct 2021 09:20:59 - > > @@ -1388,6 +1388,7 @@ static intixl_rxeof(struct ixl_softc *, > > static voidixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *); > > static voidixl_rxrefill(void *); > > static int ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *); > > +static voidixl_rx_checksum(struct mbuf *, uint64_t); > > > > #if NKSTAT > 0 > > static voidixl_kstat_attach(struct ixl_softc *); > > @@ -3190,6 +3191,7 @@ ixl_rxeof(struct ixl_softc *sc, struct i > > m->m_pkthdr.csum_flags |= M_FLOWID; > > } > > > > + ixl_rx_checksum(m, word); > > ml_enqueue(&ml, m); > > } else { > > ifp->if_ierrors++; /* XXX */ > > @@ -3320,6 +3322,23 @@ ixl_rxrinfo(struct ixl_softc *sc, struct > > free(ifr, M_TEMP, ixl_nqueues(sc) * sizeof(*ifr)); > > > > return (rv); > > +} > > + > > +static void > > +ixl_rx_checksum(struct mbuf *m, uint64_t word) > > +{ > > + if (!ISSET(word, IXL_RX_DESC_L3L4P)) > > + return; > > + > > + if (ISSET(word, IXL_RX_DESC_IPE)) > > + return; > > + > > + m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK; > > + > > + if (ISSET(word, IXL_RX_DESC_L4E)) > > + return; > > + > > + m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK; > > } > > > > static int
Re: relayd regress tcp performance
On Thu, Apr 15, 2021 at 08:43:02PM +0200, Alexander Bluhm wrote: > I found another regression with Jan's TCP diff that sends less ACK > packets. relayd run-args-http-slow-consumer.pl fails on i386 due > to his commit. This test writes a lot of data from the http server, > but blocks receive for 2 seconds in the client. Relayd between > these machines should handle the delay. The socket buffer size is > very small to trigger the situation reliably. > > The current TCP stack does not recover after the delay. Packets > are sent very slowly and the regress test runs in a timeout. When > I backout the change, the test passes quickly. > > Ususally the test runs on localhost loopback. There the problem > is not triggered. Only my i386 regress setup uses a remote machine. This issue is caused by another bug in our stack. The Stack calls tcp_output(), but does not send an ACK with a window update, after the consuming process empties the receive buffer in soreceive(). In normal conditions, the every other ACK feature hides this problem. Thus, with my diff, the 200ms ACK timer is the only mechanism that sends out ACKs. But, this is to slow, to empty the stalled buffer fast enough. The following diff removes the every 2nd ACK feature again and ensures that we send out an ACK if soreceive() empties the receive buffer. We are so close to 7.0, that I would suggest to commit this after the release. Thus, we don't risk another last minute regression. OK? bye, Jan Index: netinet/tcp_input.c === RCS file: /cvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.370 diff -u -p -r1.370 tcp_input.c --- netinet/tcp_input.c 9 Aug 2021 17:03:08 - 1.370 +++ netinet/tcp_input.c 18 Sep 2021 07:53:45 - @@ -176,8 +176,7 @@ do { \ struct ifnet *ifp = NULL; \ if (m && (m->m_flags & M_PKTHDR)) \ ifp = if_get(m->m_pkthdr.ph_ifidx); \ - if (TCP_TIMER_ISARMED(tp, TCPT_DELACK) || \ - (tcp_ack_on_push && (tiflags) & TH_PUSH) || \ + if ((tcp_ack_on_push && (tiflags) & TH_PUSH) || \ (ifp && (ifp->if_flags & IFF_LOOPBACK))) \ tp->t_flags |= TF_ACKNOW; \ else \ Index: netinet/tcp_usrreq.c === RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.181 diff -u -p -r1.181 tcp_usrreq.c --- netinet/tcp_usrreq.c30 Apr 2021 13:52:48 - 1.181 +++ netinet/tcp_usrreq.c18 Sep 2021 07:53:45 - @@ -329,8 +329,15 @@ tcp_usrreq(struct socket *so, int req, s * template for a listening socket and hence the kernel * will panic. */ - if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) { + /* +* If soreceive() empty the receive buffer, we have to +* send a window update. +*/ + if (so->so_rcv.sb_cc == 0) + tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); + } break; /*
enable cu(4) in amd64/GENERIC by default
Hi, The card and cables don't have the signaling lines, that getty to use it as com(4). But with "local" in ttys(5) it works. I have this driver in productive use for about 5 years now. OK? bye, Jan Index: arch/amd64/conf/GENERIC === RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v retrieving revision 1.499 diff -u -p -r1.499 GENERIC --- arch/amd64/conf/GENERIC 20 Aug 2021 05:23:18 - 1.499 +++ arch/amd64/conf/GENERIC 2 Sep 2021 08:49:22 - @@ -403,7 +403,7 @@ com*at pcmcia? # PCMCIA modems/serial com* at puc? # options CY_HW_RTS -#cy* at pci? # PCI cyclom serial card +cy*at pci? # PCI cyclom serial card #cz* at pci? # Cyclades-Z multi-port serial boards lpt0 at isa? port 0x378 irq 7# standard PC parallel ports
Re: vmx(4): remove useless code
On Fri, Aug 06, 2021 at 12:06:04PM +0200, Patrick Wildt wrote: > On Fri, Aug 06, 2021 at 11:05:53AM +0200, Patrick Wildt wrote: > > Am Thu, Aug 05, 2021 at 02:33:01PM +0200 schrieb Jan Klemkow: > > > Hi, > > > > > > The following diff removes useless code from the driver. As discussed > > > here [1] and committed there [2], the hypervisor doesn't do anything > > > with the data structures. We even just set NULL to the pointer since > > > the initial commit of vmx(4). So, I guess it better to remove all of > > > these. The variables are bzero'd in vmxnet3_dma_allocmem() anyway. > > > > > > OK? > > > > My main concern was if the structs are getting zeroed correctly, but > > they do, so that's fine. > > > > That said, it looks like Linux sets the pointer to ~0ULL, not 0. Should > > we follow Linux' pattern there and do that as well? > > > > Thinking about it a little more, I think we should do that as well. And > maybe explicitly set driver_data_len to 0 even though it's already zero. > Basically for readability. OK? Index: dev/pci/if_vmx.c === RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v retrieving revision 1.66 diff -u -p -r1.66 if_vmx.c --- dev/pci/if_vmx.c23 Jul 2021 00:29:14 - 1.66 +++ dev/pci/if_vmx.c6 Aug 2021 12:28:51 - @@ -157,7 +157,6 @@ struct vmxnet3_softc { #define WRITE_BAR1(sc, reg, val) \ bus_space_write_4((sc)->sc_iot1, (sc)->sc_ioh1, reg, val) #define WRITE_CMD(sc, cmd) WRITE_BAR1(sc, VMXNET3_BAR1_CMD, cmd) -#define vtophys(va) 0 /* XXX ok? */ int vmxnet3_match(struct device *, void *, void *); void vmxnet3_attach(struct device *, struct device *, void *); @@ -468,8 +467,8 @@ vmxnet3_dma_init(struct vmxnet3_softc *s ds->vmxnet3_revision = 1; ds->upt_version = 1; ds->upt_features = UPT1_F_CSUM | UPT1_F_VLAN; - ds->driver_data = vtophys(sc); - ds->driver_data_len = sizeof(struct vmxnet3_softc); + ds->driver_data = ~0ULL; + ds->driver_data_len = 0; ds->queue_shared = qs_pa; ds->queue_shared_len = qs_len; ds->mtu = VMXNET3_MAX_MTU; @@ -546,8 +545,8 @@ vmxnet3_alloc_txring(struct vmxnet3_soft ts->cmd_ring_len = NTXDESC; ts->comp_ring = comp_pa; ts->comp_ring_len = NTXCOMPDESC; - ts->driver_data = vtophys(tq); - ts->driver_data_len = sizeof *tq; + ts->driver_data = ~0ULL; + ts->driver_data_len = 0; ts->intr_idx = intr; ts->stopped = 1; ts->error = 0; @@ -598,8 +597,8 @@ vmxnet3_alloc_rxring(struct vmxnet3_soft rs->cmd_ring_len[1] = NRXDESC; rs->comp_ring = comp_pa; rs->comp_ring_len = NRXCOMPDESC; - rs->driver_data = vtophys(rq); - rs->driver_data_len = sizeof *rq; + rs->driver_data = ~0ULL; + rs->driver_data_len = 0; rs->intr_idx = intr; rs->stopped = 1; rs->error = 0;
vmx(4): remove useless code
Hi, The following diff removes useless code from the driver. As discussed here [1] and committed there [2], the hypervisor doesn't do anything with the data structures. We even just set NULL to the pointer since the initial commit of vmx(4). So, I guess it better to remove all of these. The variables are bzero'd in vmxnet3_dma_allocmem() anyway. OK? bye, Jan [1]: https://www.lkml.org/lkml/2021/1/19/1225 [2]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/net/vmxnet3/vmxnet3_drv.c?id=de1da8bcf40564a2adada2d5d5426e05355f66e8 Index: dev/pci/if_vmx.c === RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v retrieving revision 1.66 diff -u -p -r1.66 if_vmx.c --- dev/pci/if_vmx.c23 Jul 2021 00:29:14 - 1.66 +++ dev/pci/if_vmx.c5 Aug 2021 11:12:26 - @@ -157,7 +157,6 @@ struct vmxnet3_softc { #define WRITE_BAR1(sc, reg, val) \ bus_space_write_4((sc)->sc_iot1, (sc)->sc_ioh1, reg, val) #define WRITE_CMD(sc, cmd) WRITE_BAR1(sc, VMXNET3_BAR1_CMD, cmd) -#define vtophys(va) 0 /* XXX ok? */ int vmxnet3_match(struct device *, void *, void *); void vmxnet3_attach(struct device *, struct device *, void *); @@ -468,8 +467,6 @@ vmxnet3_dma_init(struct vmxnet3_softc *s ds->vmxnet3_revision = 1; ds->upt_version = 1; ds->upt_features = UPT1_F_CSUM | UPT1_F_VLAN; - ds->driver_data = vtophys(sc); - ds->driver_data_len = sizeof(struct vmxnet3_softc); ds->queue_shared = qs_pa; ds->queue_shared_len = qs_len; ds->mtu = VMXNET3_MAX_MTU; @@ -546,8 +543,6 @@ vmxnet3_alloc_txring(struct vmxnet3_soft ts->cmd_ring_len = NTXDESC; ts->comp_ring = comp_pa; ts->comp_ring_len = NTXCOMPDESC; - ts->driver_data = vtophys(tq); - ts->driver_data_len = sizeof *tq; ts->intr_idx = intr; ts->stopped = 1; ts->error = 0; @@ -598,8 +593,6 @@ vmxnet3_alloc_rxring(struct vmxnet3_soft rs->cmd_ring_len[1] = NRXDESC; rs->comp_ring = comp_pa; rs->comp_ring_len = NRXCOMPDESC; - rs->driver_data = vtophys(rq); - rs->driver_data_len = sizeof *rq; rs->intr_idx = intr; rs->stopped = 1; rs->error = 0;
Fix: tcp_output window calculation error
Hi, This calculation of the receive window has a logic error: If win is 0 it will be overwritten by (rcv_adv - rcv_nxt). Thus, win will be (rcv_adv - rcv_nxt) even if its below (sb_hiwat / 4). We could just remove the dead (sb_hiwat / 4) code, or reorder the conditions to keep the original feature. OK? bye, Jan Index: netinet/tcp_output.c === RCS file: /cvs/src/sys/netinet/tcp_output.c,v retrieving revision 1.130 diff -u -p -r1.130 tcp_output.c --- netinet/tcp_output.c8 Feb 2021 19:37:15 - 1.130 +++ netinet/tcp_output.c22 Jul 2021 12:33:13 - @@ -812,12 +812,12 @@ send: * Calculate receive window. Don't shrink window, * but avoid silly window syndrome. */ - if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg) - win = 0; if (win > (long)TCP_MAXWIN << tp->rcv_scale) win = (long)TCP_MAXWIN << tp->rcv_scale; if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt)) win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt); + if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg) + win = 0; if (flags & TH_RST) win = 0; th->th_win = htons((u_int16_t) (win>>tp->rcv_scale));
ftpd(8): Convert K&R function definitions to modern C
Hi, Convert K&R function definitions to modern C. OK? bye, Jan Index: ftpcmd.y === RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v retrieving revision 1.72 diff -u -p -r1.72 ftpcmd.y --- ftpcmd.y23 May 2021 17:01:21 - 1.72 +++ ftpcmd.y30 May 2021 15:32:50 - @@ -1072,9 +1072,7 @@ static int yylex(void); extern int epsvall; static struct tab * -lookup(p, cmd) - struct tab *p; - const char *cmd; +lookup(struct tab *p, const char *cmd) { for (; p->name != NULL; p++) @@ -1089,9 +1087,7 @@ lookup(p, cmd) * get_line - a hacked up version of fgets to ignore TELNET escape codes. */ int -get_line(s, n) - char *s; - int n; +get_line(char *s, int n) { int c; char *cs; @@ -1176,8 +1172,7 @@ get_line(s, n) /*ARGSUSED*/ void -toolong(signo) - int signo; +toolong(int signo) { struct syslog_data sdata = SYSLOG_DATA_INIT; @@ -1190,7 +1185,7 @@ toolong(signo) } static int -yylex() +yylex(void) { static int cpos; char *cp, *cp2; @@ -1429,8 +1424,7 @@ yylex() } void -upper(s) - char *s; +upper(char *s) { char *p; @@ -1439,9 +1433,7 @@ upper(s) } static void -help(ctab, s) - struct tab *ctab; - char *s; +help(struct tab *ctab, char *s) { struct tab *c; int width, NCMDS; @@ -1504,8 +1496,7 @@ help(ctab, s) } static void -sizecmd(filename) - const char *filename; +sizecmd(const char *filename) { switch (type) { case TYPE_L: Index: monitor.c === RCS file: /cvs/src/libexec/ftpd/monitor.c,v retrieving revision 1.28 diff -u -p -r1.28 monitor.c --- monitor.c 20 May 2021 15:21:03 - 1.28 +++ monitor.c 30 May 2021 15:38:52 - @@ -206,7 +206,7 @@ monitor_init(void) * for the user-privileged slave process and 1 for the monitor process. */ int -monitor_post_auth() +monitor_post_auth(void) { slave_pid = fork(); if (slave_pid == -1)
Re: ftpd(8): constify internal functions
ping? On Thu, May 13, 2021 at 04:44:56PM +0200, Jan Klemkow wrote: > ping? > > On Tue, May 04, 2021 at 10:50:50AM +0200, Jan Klemkow wrote: > > Hi, > > > > The following diff adds some missing consts for char * to the internal > > program functions. > > > > OK? > > > > bye, > > Jan > > > > Index: extern.h > > === > > RCS file: /cvs/src/libexec/ftpd/extern.h,v > > retrieving revision 1.21 > > diff -u -p -r1.21 extern.h > > --- extern.h15 Jan 2020 22:06:59 - 1.21 > > +++ extern.h4 May 2021 08:34:14 - > > @@ -64,38 +64,38 @@ > > void blkfree(char **); > > char **copyblk(char **); > > void cwd(char *); > > -void delete(char *); > > +void delete(const char *); > > void dologout(int); > > -void fatal(char *); > > +void fatal(const char *); > > intftpd_pclose(FILE *, pid_t); > > FILE *ftpd_ls(const char *, pid_t *); > > int get_line(char *, int, FILE *); > > -void ftpdlogwtmp(char *, char *, char *); > > +void ftpdlogwtmp(const char *, const char *, const char *); > > void lreply(int, const char *, ...); > > -void makedir(char *); > > -void nack(char *); > > +void makedir(const char *); > > +void nack(const char *); > > enum auth_ret > > pass(char *); > > void passive(void); > > intepsvproto2af(int); > > intaf2epsvproto(int); > > -void long_passive(char *, int); > > +void long_passive(const char *, int); > > intextended_port(const char *); > > void epsv_protounsupp(const char *); > > -void perror_reply(int, char *); > > +void perror_reply(int, const char *); > > void pwd(void); > > -void removedir(char *); > > -void renamecmd(char *, char *); > > +void removedir(const char *); > > +void renamecmd(const char *, const char *); > > char *renamefrom(char *); > > void reply(int, const char *, ...); > > void reply_r(int, const char *, ...); > > enum ret_cmd { RET_FILE, RET_LIST }; > > -void retrieve(enum ret_cmd, char *); > > +void retrieve(enum ret_cmd, const char *); > > void send_file_list(char *); > > void setproctitle(const char *, ...); > > void statcmd(void); > > -void statfilecmd(char *); > > -void store(char *, char *, int); > > +void statfilecmd(const char *); > > +void store(const char *, const char *, int); > > void upper(char *); > > void user(char *); > > void yyerror(char *); > > Index: ftpcmd.y > > === > > RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v > > retrieving revision 1.69 > > diff -u -p -r1.69 ftpcmd.y > > --- ftpcmd.y4 Mar 2020 20:17:48 - 1.69 > > +++ ftpcmd.y4 May 2021 08:34:14 - > > @@ -1065,8 +1065,8 @@ struct tab sitetab[] = { > > > > static void help(struct tab *, char *); > > static struct tab * > > -lookup(struct tab *, char *); > > -static void sizecmd(char *); > > +lookup(struct tab *, const char *); > > +static void sizecmd(const char *); > > static int yylex(void); > > > > extern int epsvall; > > @@ -1074,7 +1074,7 @@ extern int epsvall; > > static struct tab * > > lookup(p, cmd) > > struct tab *p; > > - char *cmd; > > + const char *cmd; > > { > > > > for (; p->name != NULL; p++) > > @@ -1508,7 +1506,7 @@ help(ctab, s) > > > > static void > > sizecmd(filename) > > - char *filename; > > + const char *filename; > > { > > switch (type) { > > case TYPE_L: > > Index: ftpd.c > > === > > RCS file: /cvs/src/libexec/ftpd/ftpd.c,v > > retrieving revision 1.229 > > diff -u -p -r1.229 ftpd.c > > --- ftpd.c 15 Jan 2020 22:06:59 - 1.229 > > +++ ftpd.c 4 May 2021 08:34:14 - > > @@ -191,28 +191,28 @@ char proctitle[BUFSIZ]; /* initial part > > (long long)(cnt)); \ > > } > > > > -static void ack(char *); > > +static void ack(const char *); > > static void sigurg(int); > >
Re: ftpd(8): remove double fflush(3) calls
ping? On Thu, May 13, 2021 at 04:44:23PM +0200, Jan Klemkow wrote: > ping? > > On Wed, May 05, 2021 at 04:42:49PM +0200, Jan Klemkow wrote: > > Hi, > > > > The function lreply() already calls fflush(3) on stdout. So, this calls > > are useless. > > > > OK? > > > > bye, > > Jan > > > > Index: ftpd.c > > === > > RCS file: /cvs/src/libexec/ftpd/ftpd.c,v > > retrieving revision 1.229 > > diff -u -p -r1.229 ftpd.c > > --- ftpd.c 15 Jan 2020 22:06:59 - 1.229 > > +++ ftpd.c 5 May 2021 14:39:25 - > > @@ -568,7 +568,6 @@ main(int argc, char *argv[]) > > line[strcspn(line, "\n")] = '\0'; > > lreply(530, "%s", line); > > } > > - (void) fflush(stdout); > > (void) fclose(fp); > > reply(530, "System not available."); > > exit(0); > > @@ -578,7 +577,6 @@ main(int argc, char *argv[]) > > line[strcspn(line, "\n")] = '\0'; > > lreply(220, "%s", line); > > } > > - (void) fflush(stdout); > > (void) fclose(fp); > > /* reply(220,) must follow */ > > } > > @@ -1078,7 +1076,6 @@ pass(char *passwd) > > line[strcspn(line, "\n")] = '\0'; > > lreply(230, "%s", line); > > } > > - (void) fflush(stdout); > > (void) fclose(fp); > > } > > free(motd); > > @@ -2029,7 +2026,6 @@ cwd(char *path) > > line[strcspn(line, "\n")] = '\0'; > > lreply(250, "%s", line); > > } > > - (void) fflush(stdout); > > (void) fclose(message); > > } > > ack("CWD"); > > >
Re: ftpd(8): remove useless islower(3) in upper()
ping? On Thu, May 13, 2021 at 04:45:14PM +0200, Jan Klemkow wrote: > ping? > > On Sat, May 01, 2021 at 11:19:56AM +0200, Jan Klemkow wrote: > > Hi, > > > > This cleanup diff, removes a useless if islower(3) from the loop. It is > > guarantee by toupper(3) that no character will be changed if its not a > > lower one. > > > > man toupper(3): > > The toupper() and toupper_l() functions convert a lower-case letter > > to the corresponding upper-case letter. The _toupper() function is > > identical to toupper() except that c must be a lower-case letter. > > > > POSIX: > > ... > > All other arguments in the domain are returned unchanged. > > ... > > > > OK? > > > > bye, > > Jan > > > > Index: ftpcmd.y > > === > > RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v > > retrieving revision 1.69 > > diff -u -p -r1.69 ftpcmd.y > > --- ftpcmd.y4 Mar 2020 20:17:48 - 1.69 > > +++ ftpcmd.y1 May 2021 09:09:46 - > > @@ -1435,10 +1435,8 @@ upper(s) > > { > > char *p; > > > > - for (p = s; *p; p++) { > > - if (islower((unsigned char)*p)) > > - *p = (char)toupper((unsigned char)*p); > > - } > > + for (p = s; *p; p++) > > + *p = (char)toupper((unsigned char)*p); > > } > > > > static void > >
Re: snmpd rename context to pdutype
On Fri, May 07, 2021 at 04:18:50PM +0200, Martijn van Duren wrote: > When moving the traphandler to the snmpe process I overlooked the fact > that "type" is being saved inside the switch statement under the > sm_context name. RFC3411 talks about pduType, and the name context means > something completely different in the v3 world. > > The diff below moves our naming closer to the RFCs, which should > hopefully prevent further confusion in the future. > > While here I made the debug output print the pduType in a human readable > format. > > The invalid varbind check can be simplified a simple "{}" in the > ober_scanf_elements allowing me to just drop the type variable. > > OK? I tested it and the diff looks good and legit for me. > martijn@ > > Index: snmp.h > === > RCS file: /cvs/src/usr.sbin/snmpd/snmp.h,v > retrieving revision 1.16 > diff -u -p -r1.16 snmp.h > --- snmp.h30 Jun 2020 17:11:49 - 1.16 > +++ snmp.h7 May 2021 14:17:12 - > @@ -77,7 +77,7 @@ enum snmp_version { > SNMP_V3 = 3 > }; > > -enum snmp_context { > +enum snmp_pdutype { > SNMP_C_GETREQ = 0, > SNMP_C_GETNEXTREQ = 1, > SNMP_C_GETRESP = 2, > Index: snmpd.h > === > RCS file: /cvs/src/usr.sbin/snmpd/snmpd.h,v > retrieving revision 1.94 > diff -u -p -r1.94 snmpd.h > --- snmpd.h 5 Feb 2021 10:30:45 - 1.94 > +++ snmpd.h 7 May 2021 14:17:12 - > @@ -384,7 +384,7 @@ struct snmp_message { > socklen_tsm_slen; > int sm_sock_tcp; > int sm_aflags; > - int sm_type; > + enum snmp_pdutypesm_pdutype; > struct event sm_sockev; > char sm_host[HOST_NAME_MAX+1]; > in_port_tsm_port; > @@ -405,7 +405,6 @@ struct snmp_message { > > /* V1, V2c */ > char sm_community[SNMPD_MAXCOMMUNITYLEN]; > - int sm_context; > > /* V3 */ > long longsm_msgid; > Index: snmpe.c > === > RCS file: /cvs/src/usr.sbin/snmpd/snmpe.c,v > retrieving revision 1.70 > diff -u -p -r1.70 snmpe.c > --- snmpe.c 22 Feb 2021 11:31:09 - 1.70 > +++ snmpe.c 7 May 2021 14:17:12 - > @@ -41,6 +41,7 @@ > #include "mib.h" > > void snmpe_init(struct privsep *, struct privsep_proc *, void *); > +const char *snmpe_pdutype2string(enum snmp_pdutype); > int snmpe_parse(struct snmp_message *); > void snmpe_tryparse(int, struct snmp_message *); > int snmpe_parsevarbinds(struct snmp_message *); > @@ -194,6 +195,36 @@ snmpe_bind(struct address *addr) > return (-1); > } > > +const char * > +snmpe_pdutype2string(enum snmp_pdutype pdutype) > +{ > + static char unknown[sizeof("Unknown (4294967295)")]; > + > + switch (pdutype) { > + case SNMP_C_GETREQ: > + return "GetRequest"; > + case SNMP_C_GETNEXTREQ: > + return "GetNextRequest"; > + case SNMP_C_GETRESP: > + return "Response"; > + case SNMP_C_SETREQ: > + return "SetRequest"; > + case SNMP_C_TRAP: > + return "Trap"; > + case SNMP_C_GETBULKREQ: > + return "GetBulkRequest"; > + case SNMP_C_INFORMREQ: > + return "InformRequest"; > + case SNMP_C_TRAPV2: > + return "SNMPv2-Trap"; > + case SNMP_C_REPORT: > + return "Report"; > + } > + > + snprintf(unknown, sizeof(unknown), "Unknown (%u)", pdutype); > + return unknown; > +} > + > int > snmpe_parse(struct snmp_message *msg) > { > @@ -202,7 +233,6 @@ snmpe_parse(struct snmp_message *msg) > struct ber_element *a; > long longver, req; > long longerrval, erridx; > - unsigned int type; > u_intclass; > char*comn; > char*flagstr, *ctxname; > @@ -271,15 +301,15 @@ snmpe_parse(struct snmp_message *msg) > goto fail; > } > > - if (ober_scanf_elements(msg->sm_pdu, "t{e", &class, &type, &a) != 0) > + if (ober_scanf_elements(msg->sm_pdu, "t{e", &class, &(msg->sm_pdutype), > + &a) != 0) > goto parsefail; > > /* SNMP PDU context */ > if (class != BER_CLASS_CONTEXT) > goto parsefail; > > - msg->sm_type = type; > - switch (type) { > + switch (msg->sm_pdutype) { > case SNMP_C_GETBULKREQ: > if (msg->sm_version == SNMP_V1) { > stats->snmp_inbadversions++; > @@ -294,7 +324,7 @@ snmpe_parse(struct snmp_message *msg) > /* FALLTHROUGH */ > > case SNMP_C_GETNEXTREQ: > - if (typ
ftpd(8): remove useless parameter of get_line()
Hi, This diff removes the useless FILE* parameter of get_line(). In every call this parameter is always "stdin". Thus, we can replace ever use of the variable iop with stdin. Like every other diff, I tested this diff with the ftpd regression tests. OK? bye, Jan Index: extern.h === RCS file: /cvs/src/libexec/ftpd/extern.h,v retrieving revision 1.21 diff -u -p -r1.21 extern.h --- extern.h15 Jan 2020 22:06:59 - 1.21 +++ extern.h16 May 2021 15:36:27 - @@ -69,7 +69,7 @@ void dologout(int); void fatal(char *); intftpd_pclose(FILE *, pid_t); FILE *ftpd_ls(const char *, pid_t *); -int get_line(char *, int, FILE *); +int get_line(char *, int); void ftpdlogwtmp(char *, char *, char *); void lreply(int, const char *, ...); void makedir(char *); Index: ftpcmd.y === RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v retrieving revision 1.69 diff -u -p -r1.69 ftpcmd.y --- ftpcmd.y4 Mar 2020 20:17:48 - 1.69 +++ ftpcmd.y16 May 2021 15:38:07 - @@ -1089,10 +1089,9 @@ lookup(p, cmd) * get_line - a hacked up version of fgets to ignore TELNET escape codes. */ int -get_line(s, n, iop) +get_line(s, n) char *s; int n; - FILE *iop; { int c; char *cs; @@ -,21 +1110,21 @@ get_line(s, n, iop) if (c == 0) tmpline[0] = '\0'; } - while ((c = getc(iop)) != EOF) { + while ((c = getc(stdin)) != EOF) { c &= 0377; if (c == IAC) { - if ((c = getc(iop)) != EOF) { + if ((c = getc(stdin)) != EOF) { c &= 0377; switch (c) { case WILL: case WONT: - c = getc(iop); + c = getc(stdin); printf("%c%c%c", IAC, DONT, 0377&c); (void) fflush(stdout); continue; case DO: case DONT: - c = getc(iop); + c = getc(stdin); printf("%c%c%c", IAC, WONT, 0377&c); (void) fflush(stdout); continue; @@ -1144,7 +1143,7 @@ get_line(s, n, iop) * This prevents the command to be split up into * multiple commands. */ - while (c != '\n' && (c = getc(iop)) != EOF) + while (c != '\n' && (c = getc(stdin)) != EOF) ; return (-2); } @@ -1204,7 +1203,7 @@ yylex() case CMD: (void) alarm((unsigned) timeout); - n = get_line(cbuf, sizeof(cbuf)-1, stdin); + n = get_line(cbuf, sizeof(cbuf)-1); if (n == -1) { reply(221, "You could at least say goodbye."); dologout(0); Index: ftpd.c === RCS file: /cvs/src/libexec/ftpd/ftpd.c,v retrieving revision 1.229 diff -u -p -r1.229 ftpd.c --- ftpd.c 15 Jan 2020 22:06:59 - 1.229 +++ ftpd.c 16 May 2021 15:44:17 - @@ -2179,7 +2179,7 @@ myoob(void) if (!transflag) return; cp = tmpline; - ret = get_line(cp, sizeof(tmpline)-1, stdin); + ret = get_line(cp, sizeof(tmpline)-1); if (ret == -1) { reply(221, "You could at least say goodbye."); dologout(0);
Re: ftpd(8): add pledge(2)
On Thu, May 13, 2021 at 10:40:40AM -0600, Theo de Raadt wrote: > + if (pledge("stdio rpath inet recvfd sendfd " > + "wpath cpath proc tty getpw", NULL) == -1) > > Please change the order: > > stdio rpath wpath cpath inet recvfd sendfd proc tty getpw > > (It remains extremely permissive). Yes. Further refactoring may reduce the needed syscalls in the future? OK? Thanks, Jan Index: monitor.c === RCS file: /cvs/src/libexec/ftpd/monitor.c,v retrieving revision 1.26 diff -u -p -r1.26 monitor.c --- monitor.c 28 Jun 2019 13:32:53 - 1.26 +++ monitor.c 13 May 2021 17:12:18 - @@ -295,11 +295,17 @@ handle_cmds(void) sizeof(slavequit)); break; case AUTH_SLAVE: + if (pledge("stdio rpath wpath cpath inet recvfd" + " sendfd proc tty getpw", NULL) == -1) + fatalx("pledge"); /* User-privileged slave */ debugmsg("user-privileged slave started"); return; /* NOTREACHED */ case AUTH_MONITOR: + if (pledge("stdio inet sendfd recvfd proc", + NULL) == -1) + fatalx("pledge"); /* Post-auth monitor */ debugmsg("monitor went into post-auth phase"); state = POSTAUTH;
Re: ftpd(8): constify internal functions
ping? On Tue, May 04, 2021 at 10:50:50AM +0200, Jan Klemkow wrote: > Hi, > > The following diff adds some missing consts for char * to the internal > program functions. > > OK? > > bye, > Jan > > Index: extern.h > === > RCS file: /cvs/src/libexec/ftpd/extern.h,v > retrieving revision 1.21 > diff -u -p -r1.21 extern.h > --- extern.h 15 Jan 2020 22:06:59 - 1.21 > +++ extern.h 4 May 2021 08:34:14 - > @@ -64,38 +64,38 @@ > void blkfree(char **); > char **copyblk(char **); > void cwd(char *); > -void delete(char *); > +void delete(const char *); > void dologout(int); > -void fatal(char *); > +void fatal(const char *); > int ftpd_pclose(FILE *, pid_t); > FILE *ftpd_ls(const char *, pid_t *); > int get_line(char *, int, FILE *); > -void ftpdlogwtmp(char *, char *, char *); > +void ftpdlogwtmp(const char *, const char *, const char *); > void lreply(int, const char *, ...); > -void makedir(char *); > -void nack(char *); > +void makedir(const char *); > +void nack(const char *); > enum auth_ret > pass(char *); > void passive(void); > int epsvproto2af(int); > int af2epsvproto(int); > -void long_passive(char *, int); > +void long_passive(const char *, int); > int extended_port(const char *); > void epsv_protounsupp(const char *); > -void perror_reply(int, char *); > +void perror_reply(int, const char *); > void pwd(void); > -void removedir(char *); > -void renamecmd(char *, char *); > +void removedir(const char *); > +void renamecmd(const char *, const char *); > char *renamefrom(char *); > void reply(int, const char *, ...); > void reply_r(int, const char *, ...); > enum ret_cmd { RET_FILE, RET_LIST }; > -void retrieve(enum ret_cmd, char *); > +void retrieve(enum ret_cmd, const char *); > void send_file_list(char *); > void setproctitle(const char *, ...); > void statcmd(void); > -void statfilecmd(char *); > -void store(char *, char *, int); > +void statfilecmd(const char *); > +void store(const char *, const char *, int); > void upper(char *); > void user(char *); > void yyerror(char *); > Index: ftpcmd.y > === > RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v > retrieving revision 1.69 > diff -u -p -r1.69 ftpcmd.y > --- ftpcmd.y 4 Mar 2020 20:17:48 - 1.69 > +++ ftpcmd.y 4 May 2021 08:34:14 - > @@ -1065,8 +1065,8 @@ struct tab sitetab[] = { > > static void help(struct tab *, char *); > static struct tab * > - lookup(struct tab *, char *); > -static void sizecmd(char *); > + lookup(struct tab *, const char *); > +static void sizecmd(const char *); > static intyylex(void); > > extern int epsvall; > @@ -1074,7 +1074,7 @@ extern int epsvall; > static struct tab * > lookup(p, cmd) > struct tab *p; > - char *cmd; > + const char *cmd; > { > > for (; p->name != NULL; p++) > @@ -1508,7 +1506,7 @@ help(ctab, s) > > static void > sizecmd(filename) > - char *filename; > + const char *filename; > { > switch (type) { > case TYPE_L: > Index: ftpd.c > === > RCS file: /cvs/src/libexec/ftpd/ftpd.c,v > retrieving revision 1.229 > diff -u -p -r1.229 ftpd.c > --- ftpd.c15 Jan 2020 22:06:59 - 1.229 > +++ ftpd.c4 May 2021 08:34:14 - > @@ -191,28 +191,28 @@ charproctitle[BUFSIZ]; /* initial part > (long long)(cnt)); \ > } > > -static void ack(char *); > +static void ack(const char *); > static void sigurg(int); > static void myoob(void); > -static intcheckuser(char *, char *); > -static FILE *dataconn(char *, off_t, char *); > +static intcheckuser(char *, const char *); > +static FILE *dataconn(const char *, off_t, char *); > static void dolog(struct sockaddr *); > static char *copy_dir(char *, struct passwd *); > static char *curdir(void); > static void end_login(void); > static FILE *getdatasock(char *); > -static intguniquefd(char *, char **); > +static intguniquefd(const char *, char **); > static void lostconn(int); > static void sigquit(int); > static intreceive_data(FILE *, FILE *); > static void replydirname(const char *, const char *); > static intsend_data(FILE *, FILE *, off_t, off_t, int); > static struct passwd * > - sgetpwnam(char *, struct passwd *); > + sgetpwnam(const char *, struct passwd *); > static void
Re: ftpd(8): remove useless islower(3) in upper()
ping? On Sat, May 01, 2021 at 11:19:56AM +0200, Jan Klemkow wrote: > Hi, > > This cleanup diff, removes a useless if islower(3) from the loop. It is > guarantee by toupper(3) that no character will be changed if its not a > lower one. > > man toupper(3): > The toupper() and toupper_l() functions convert a lower-case letter > to the corresponding upper-case letter. The _toupper() function is > identical to toupper() except that c must be a lower-case letter. > > POSIX: > ... > All other arguments in the domain are returned unchanged. > ... > > OK? > > bye, > Jan > > Index: ftpcmd.y > === > RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v > retrieving revision 1.69 > diff -u -p -r1.69 ftpcmd.y > --- ftpcmd.y 4 Mar 2020 20:17:48 - 1.69 > +++ ftpcmd.y 1 May 2021 09:09:46 - > @@ -1435,10 +1435,8 @@ upper(s) > { > char *p; > > - for (p = s; *p; p++) { > - if (islower((unsigned char)*p)) > - *p = (char)toupper((unsigned char)*p); > - } > + for (p = s; *p; p++) > + *p = (char)toupper((unsigned char)*p); > } > > static void >
Re: ftpd(8): remove double fflush(3) calls
ping? On Wed, May 05, 2021 at 04:42:49PM +0200, Jan Klemkow wrote: > Hi, > > The function lreply() already calls fflush(3) on stdout. So, this calls > are useless. > > OK? > > bye, > Jan > > Index: ftpd.c > === > RCS file: /cvs/src/libexec/ftpd/ftpd.c,v > retrieving revision 1.229 > diff -u -p -r1.229 ftpd.c > --- ftpd.c15 Jan 2020 22:06:59 - 1.229 > +++ ftpd.c5 May 2021 14:39:25 - > @@ -568,7 +568,6 @@ main(int argc, char *argv[]) > line[strcspn(line, "\n")] = '\0'; > lreply(530, "%s", line); > } > - (void) fflush(stdout); > (void) fclose(fp); > reply(530, "System not available."); > exit(0); > @@ -578,7 +577,6 @@ main(int argc, char *argv[]) > line[strcspn(line, "\n")] = '\0'; > lreply(220, "%s", line); > } > - (void) fflush(stdout); > (void) fclose(fp); > /* reply(220,) must follow */ > } > @@ -1078,7 +1076,6 @@ pass(char *passwd) > line[strcspn(line, "\n")] = '\0'; > lreply(230, "%s", line); > } > - (void) fflush(stdout); > (void) fclose(fp); > } > free(motd); > @@ -2029,7 +2026,6 @@ cwd(char *path) > line[strcspn(line, "\n")] = '\0'; > lreply(250, "%s", line); > } > - (void) fflush(stdout); > (void) fclose(message); > } > ack("CWD"); >
ftpd(8): add pledge(2)
Hi, This is the first attempt to bring pledge into ftpd. The Main ftpd process can't use pledge for now because of possible chroot(2) calls. But, the two forks after user login are pledged with this diff. I tested it manually and with the ftpd's regression tests. OK? bye, Jan Index: monitor.c === RCS file: /cvs/src/libexec/ftpd/monitor.c,v retrieving revision 1.26 diff -u -p -r1.26 monitor.c --- monitor.c 28 Jun 2019 13:32:53 - 1.26 +++ monitor.c 13 May 2021 14:31:35 - @@ -295,11 +295,17 @@ handle_cmds(void) sizeof(slavequit)); break; case AUTH_SLAVE: + if (pledge("stdio rpath inet recvfd sendfd " + "wpath cpath proc tty getpw", NULL) == -1) + fatalx("pledge"); /* User-privileged slave */ debugmsg("user-privileged slave started"); return; /* NOTREACHED */ case AUTH_MONITOR: + if (pledge("stdio inet sendfd recvfd proc", + NULL) == -1) + fatalx("pledge"); /* Post-auth monitor */ debugmsg("monitor went into post-auth phase"); state = POSTAUTH;
Re: services(5): add default ftps ports
On Thu, May 06, 2021 at 11:09:03AM -0600, Theo de Raadt wrote: > Jan Klemkow wrote: > > > > > > I'm working on a diff to bring ftps with libtls into our ftpd(8). > > > > > There > > > > > is a "getaddrinfo(NULL, "ftps", &hints, &res0)" call, which uses this > > > > > port. Thus, I made this change. > > > > > > > > Hang on -- does the world want ftps support? > > > > I don't know, what "the world" wants. But, I want ftps. As far as I > > can see, ftps is the only way to bring our ftpd(8) into the 21st > > century. > > I have a really hard time with that. > > The protocol is completely broken, and in a way that adding TLS makes it > even worse. OK. And what should we do with ftpd(8)? I see just three ways: 1. Prepare it for usage in modern internet with crypto support. 2. Just use it for anonymous public file distribution. 3. Remove the daemon. In my opinion the protocol is not that bad and our daemon just need some refactoring and encryption support.
Re: services(5): add default ftps ports
On Thu, May 06, 2021 at 06:36:52PM +0200, Mark Kettenis wrote: > > From: "Theo de Raadt" > > Date: Thu, 06 May 2021 10:26:31 -0600 > > > > Jan Klemkow wrote: > > > > > On Wed, May 05, 2021 at 12:18:43PM -0600, Theo de Raadt wrote: > > > > I would like a further justification for removing these ports from > > > > the very limited dynamic reserved space used by bindresvport. > > > > > > > > (but not by rresvport, which appears still stomp over them) > > > > > > > > For tcp, 32 of the 512 are locked out. > > > > For udp, 19. > > > > > > > > What software is actually using these ports? > > > > > > > > Is that software irrelevant these days? > > > > > > I'm working on a diff to bring ftps with libtls into our ftpd(8). There > > > is a "getaddrinfo(NULL, "ftps", &hints, &res0)" call, which uses this > > > port. Thus, I made this change. > > > > Hang on -- does the world want ftps support? I don't know, what "the world" wants. But, I want ftps. As far as I can see, ftps is the only way to bring our ftpd(8) into the 21st century. I use ftp in my private local setup. I also want to use over public internet in the future, like I did in the past. Thats why I'm working on it. > I was going to ask the same thing. I mean even with encryption the > FTP protocol still is a bad idea given all the problems with NAT > traversal and such. In don't use NAT or packet filters in my setup. With IPv6 there is no active FTP problem.
Re: services(5): add default ftps ports
On Wed, May 05, 2021 at 12:18:43PM -0600, Theo de Raadt wrote: > I would like a further justification for removing these ports from > the very limited dynamic reserved space used by bindresvport. > > (but not by rresvport, which appears still stomp over them) > > For tcp, 32 of the 512 are locked out. > For udp, 19. > > What software is actually using these ports? > > Is that software irrelevant these days? I'm working on a diff to bring ftps with libtls into our ftpd(8). There is a "getaddrinfo(NULL, "ftps", &hints, &res0)" call, which uses this port. Thus, I made this change. > Jan Klemkow wrote: > > On Wed, May 05, 2021 at 11:09:12AM +0100, Stuart Henderson wrote: > > > On 2021/05/04 12:07, Jan Klemkow wrote: > > > > Add missing ftps defaults ports to servies(5). > > > > > > > > Index: services > > > > === > > > > RCS file: /cvs/src/etc/services,v > > > > retrieving revision 1.99 > > > > diff -u -p -r1.99 services > > > > --- services18 Feb 2021 02:30:29 - 1.99 > > > > +++ services4 May 2021 10:01:35 - > > > > @@ -318,6 +318,10 @@ krb_prop 754/tcp hprop # > > > > Kerberos slav > > > > krbupdate 760/tcp kreg# BSD Kerberos > > > > registration > > > > supfilesrv 871/tcp # SUP server > > > > swat 901/tcp # Samba Web > > > > Administration Tool > > > > +ftps-data 989/tcp # ftp data over TLS/SSL > > > > +ftps-data 989/udp # ftp data over TLS/SSL > > > > +ftps 990/tcp # ftp control over > > > > TLS/SSL > > > > +ftps 990/udp # ftp control over > > > > TLS/SSL > > > > > > I'm OK with adding the TCP ones (though ftp-over-tls always makes me > > > want to rant...). It's not going to run on UDP though so I think those > > > should not be added. > > > > OK? > > > > Index: services > > === > > RCS file: /cvs/src/etc/services,v > > retrieving revision 1.99 > > diff -u -p -r1.99 services > > --- services18 Feb 2021 02:30:29 - 1.99 > > +++ services5 May 2021 12:24:29 - > > @@ -318,6 +318,8 @@ krb_prop754/tcp hprop # > > Kerberos slav > > krbupdate 760/tcp kreg# BSD Kerberos registration > > supfilesrv 871/tcp # SUP server > > swat 901/tcp # Samba Web > > Administration Tool > > +ftps-data 989/tcp # ftp data over TLS > > +ftps 990/tcp # ftp control over TLS > > supfiledbg 1127/tcp# SUP debugging > > support1529/tcp# GNATS, cygnus bug > > tracker > > datametrics1645/udp > > >
ftpd(8): remove double fflush(3) calls
Hi, The function lreply() already calls fflush(3) on stdout. So, this calls are useless. OK? bye, Jan Index: ftpd.c === RCS file: /cvs/src/libexec/ftpd/ftpd.c,v retrieving revision 1.229 diff -u -p -r1.229 ftpd.c --- ftpd.c 15 Jan 2020 22:06:59 - 1.229 +++ ftpd.c 5 May 2021 14:39:25 - @@ -568,7 +568,6 @@ main(int argc, char *argv[]) line[strcspn(line, "\n")] = '\0'; lreply(530, "%s", line); } - (void) fflush(stdout); (void) fclose(fp); reply(530, "System not available."); exit(0); @@ -578,7 +577,6 @@ main(int argc, char *argv[]) line[strcspn(line, "\n")] = '\0'; lreply(220, "%s", line); } - (void) fflush(stdout); (void) fclose(fp); /* reply(220,) must follow */ } @@ -1078,7 +1076,6 @@ pass(char *passwd) line[strcspn(line, "\n")] = '\0'; lreply(230, "%s", line); } - (void) fflush(stdout); (void) fclose(fp); } free(motd); @@ -2029,7 +2026,6 @@ cwd(char *path) line[strcspn(line, "\n")] = '\0'; lreply(250, "%s", line); } - (void) fflush(stdout); (void) fclose(message); } ack("CWD");
Re: services(5): add default ftps ports
On Wed, May 05, 2021 at 11:09:12AM +0100, Stuart Henderson wrote: > On 2021/05/04 12:07, Jan Klemkow wrote: > > Add missing ftps defaults ports to servies(5). > > > > Index: services > > === > > RCS file: /cvs/src/etc/services,v > > retrieving revision 1.99 > > diff -u -p -r1.99 services > > --- services18 Feb 2021 02:30:29 - 1.99 > > +++ services4 May 2021 10:01:35 - > > @@ -318,6 +318,10 @@ krb_prop 754/tcp hprop # > > Kerberos slav > > krbupdate 760/tcp kreg# BSD Kerberos registration > > supfilesrv 871/tcp # SUP server > > swat 901/tcp # Samba Web > > Administration Tool > > +ftps-data 989/tcp # ftp data over TLS/SSL > > +ftps-data 989/udp # ftp data over TLS/SSL > > +ftps 990/tcp # ftp control over > > TLS/SSL > > +ftps 990/udp # ftp control over > > TLS/SSL > > I'm OK with adding the TCP ones (though ftp-over-tls always makes me > want to rant...). It's not going to run on UDP though so I think those > should not be added. OK? Index: services === RCS file: /cvs/src/etc/services,v retrieving revision 1.99 diff -u -p -r1.99 services --- services18 Feb 2021 02:30:29 - 1.99 +++ services5 May 2021 12:24:29 - @@ -318,6 +318,8 @@ krb_prop754/tcp hprop # Kerberos slav krbupdate 760/tcp kreg# BSD Kerberos registration supfilesrv 871/tcp # SUP server swat 901/tcp # Samba Web Administration Tool +ftps-data 989/tcp # ftp data over TLS +ftps 990/tcp # ftp control over TLS supfiledbg 1127/tcp# SUP debugging support1529/tcp# GNATS, cygnus bug tracker datametrics1645/udp
services(5): add default ftps ports
Hi, Add missing ftps defaults ports to servies(5). OK? bye, Jan Index: services === RCS file: /cvs/src/etc/services,v retrieving revision 1.99 diff -u -p -r1.99 services --- services18 Feb 2021 02:30:29 - 1.99 +++ services4 May 2021 10:01:35 - @@ -318,6 +318,10 @@ krb_prop 754/tcp hprop # Kerberos slav krbupdate 760/tcp kreg# BSD Kerberos registration supfilesrv 871/tcp # SUP server swat 901/tcp # Samba Web Administration Tool +ftps-data 989/tcp # ftp data over TLS/SSL +ftps-data 989/udp # ftp data over TLS/SSL +ftps 990/tcp # ftp control over TLS/SSL +ftps 990/udp # ftp control over TLS/SSL supfiledbg 1127/tcp# SUP debugging support1529/tcp# GNATS, cygnus bug tracker datametrics1645/udp
ftpd(8): constify internal functions
Hi, The following diff adds some missing consts for char * to the internal program functions. OK? bye, Jan Index: extern.h === RCS file: /cvs/src/libexec/ftpd/extern.h,v retrieving revision 1.21 diff -u -p -r1.21 extern.h --- extern.h15 Jan 2020 22:06:59 - 1.21 +++ extern.h4 May 2021 08:34:14 - @@ -64,38 +64,38 @@ void blkfree(char **); char **copyblk(char **); void cwd(char *); -void delete(char *); +void delete(const char *); void dologout(int); -void fatal(char *); +void fatal(const char *); intftpd_pclose(FILE *, pid_t); FILE *ftpd_ls(const char *, pid_t *); int get_line(char *, int, FILE *); -void ftpdlogwtmp(char *, char *, char *); +void ftpdlogwtmp(const char *, const char *, const char *); void lreply(int, const char *, ...); -void makedir(char *); -void nack(char *); +void makedir(const char *); +void nack(const char *); enum auth_ret pass(char *); void passive(void); intepsvproto2af(int); intaf2epsvproto(int); -void long_passive(char *, int); +void long_passive(const char *, int); intextended_port(const char *); void epsv_protounsupp(const char *); -void perror_reply(int, char *); +void perror_reply(int, const char *); void pwd(void); -void removedir(char *); -void renamecmd(char *, char *); +void removedir(const char *); +void renamecmd(const char *, const char *); char *renamefrom(char *); void reply(int, const char *, ...); void reply_r(int, const char *, ...); enum ret_cmd { RET_FILE, RET_LIST }; -void retrieve(enum ret_cmd, char *); +void retrieve(enum ret_cmd, const char *); void send_file_list(char *); void setproctitle(const char *, ...); void statcmd(void); -void statfilecmd(char *); -void store(char *, char *, int); +void statfilecmd(const char *); +void store(const char *, const char *, int); void upper(char *); void user(char *); void yyerror(char *); Index: ftpcmd.y === RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v retrieving revision 1.69 diff -u -p -r1.69 ftpcmd.y --- ftpcmd.y4 Mar 2020 20:17:48 - 1.69 +++ ftpcmd.y4 May 2021 08:34:14 - @@ -1065,8 +1065,8 @@ struct tab sitetab[] = { static void help(struct tab *, char *); static struct tab * -lookup(struct tab *, char *); -static void sizecmd(char *); +lookup(struct tab *, const char *); +static void sizecmd(const char *); static int yylex(void); extern int epsvall; @@ -1074,7 +1074,7 @@ extern int epsvall; static struct tab * lookup(p, cmd) struct tab *p; - char *cmd; + const char *cmd; { for (; p->name != NULL; p++) @@ -1508,7 +1506,7 @@ help(ctab, s) static void sizecmd(filename) - char *filename; + const char *filename; { switch (type) { case TYPE_L: Index: ftpd.c === RCS file: /cvs/src/libexec/ftpd/ftpd.c,v retrieving revision 1.229 diff -u -p -r1.229 ftpd.c --- ftpd.c 15 Jan 2020 22:06:59 - 1.229 +++ ftpd.c 4 May 2021 08:34:14 - @@ -191,28 +191,28 @@ char proctitle[BUFSIZ]; /* initial part (long long)(cnt)); \ } -static void ack(char *); +static void ack(const char *); static void sigurg(int); static void myoob(void); -static int checkuser(char *, char *); -static FILE*dataconn(char *, off_t, char *); +static int checkuser(char *, const char *); +static FILE*dataconn(const char *, off_t, char *); static void dolog(struct sockaddr *); static char*copy_dir(char *, struct passwd *); static char*curdir(void); static void end_login(void); static FILE*getdatasock(char *); -static int guniquefd(char *, char **); +static int guniquefd(const char *, char **); static void lostconn(int); static void sigquit(int); static int receive_data(FILE *, FILE *); static void replydirname(const char *, const char *); static int send_data(FILE *, FILE *, off_t, off_t, int); static struct passwd * -sgetpwnam(char *, struct passwd *); +sgetpwnam(const char *, struct passwd *); static void reapchild(int); static void usage(void); -voidlogxfer(char *, off_t, time_t); +voidlogxfer(const char *, off_t, time_t); voidset_slave_signals(void); static char * @@ -638,7 +638,7 @@ sigquit(int signo) * (e.g., globbing). */ static struct passwd * -sgetpwnam(char *name, struct passwd *pw) +sgetpwnam(const char *name, struct passwd *pw) { static struct passwd *save; struct passwd *old; @@ -819,7 +819,7 @@ user(char *name) * Check if a user is in the file "fname" */ static int -checkuser(char *fname, char *name) +checkuser(char *fna
ftpd(8): remove useless islower(3) in upper()
Hi, This cleanup diff, removes a useless if islower(3) from the loop. It is guarantee by toupper(3) that no character will be changed if its not a lower one. man toupper(3): The toupper() and toupper_l() functions convert a lower-case letter to the corresponding upper-case letter. The _toupper() function is identical to toupper() except that c must be a lower-case letter. POSIX: ... All other arguments in the domain are returned unchanged. ... OK? bye, Jan Index: ftpcmd.y === RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v retrieving revision 1.69 diff -u -p -r1.69 ftpcmd.y --- ftpcmd.y4 Mar 2020 20:17:48 - 1.69 +++ ftpcmd.y1 May 2021 09:09:46 - @@ -1435,10 +1435,8 @@ upper(s) { char *p; - for (p = s; *p; p++) { - if (islower((unsigned char)*p)) - *p = (char)toupper((unsigned char)*p); - } + for (p = s; *p; p++) + *p = (char)toupper((unsigned char)*p); } static void
fyi: get HP EliteBook 830 G7/G8 booting
Hi, If you want to boot OpenBSD on an HP EliteBook 830 G7/G8, the bootloader will hang while loading the kernel. Because, the UEFI loads the bootloader on the same place in memory, where the bootloader will copy the kernel. We are unable to load the kernel on arbitrary memory. Thus, the following diff will help you, to get OpenBSD running on these machines. It moves the hardcoded Kernel address to a free place. I don't intend to commit this. Its just a hint for people who are running in the same issues, as I did. bye, Jan Index: arch/amd64/conf//ld.script === RCS file: /cvs/src/sys/arch/amd64/conf/ld.script,v retrieving revision 1.17 diff -u -p -r1.17 ld.script --- arch/amd64/conf//ld.script 7 Mar 2021 23:10:54 - 1.17 +++ arch/amd64/conf//ld.script 18 Mar 2021 21:11:18 - @@ -38,8 +38,8 @@ PHDRS */ __ALIGN_SIZE = 0x1000; __kernel_base = 0x8000; -__kernel_virt_base = __kernel_base + 0x100; -__kernel_phys_base = 0x100; +__kernel_virt_base = __kernel_base + 0x103; +__kernel_phys_base = 0x103; __kernel_virt_to_phys = __kernel_phys_base - __kernel_virt_base; ENTRY(start)
Re: vmm crash on 6.9-beta
Hi, I had the same issue a few days ago a server hardware of mine. I just ran 'cvs up'. So, it looks like a generic bug in FFS and not related to vmm. OpenBSD 6.9-beta (GENERIC.MP) #396: Thu Mar 11 19:15:56 MST 2021 dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP ciao, Jan ddb{2}> show panic ffs_valloc: dup alloc ddb{2}> trace db_enter() at db_enter+0x10 panic(81dda170) at panic+0x12a ffs_inode_alloc(fd8a1acb50f0,81a4,fd8c3f7ba120,8000229d3088) at ffs _inode_alloc+0x442 ufs_makeinode(81a4,fd8a8a498940,8000229d3380,8000229d33d0) at ufs_m akeinode+0x7f ufs_create(8000229d3130) at ufs_create+0x3c VOP_CREATE(fd8a8a498940,8000229d3380,8000229d33d0,8000229d3190) at VOP_CREATE+0x4a vn_open(8000229d3350,602,1a4) at vn_open+0x182 doopenat(800022915500,ff9c,cc7a0280ad0,601,1b6,8000229d3550) at doo penat+0x1cd syscall(8000229d35c0) at syscall+0x389 Xsyscall() at Xsyscall+0x128 end of kernel end trace frame: 0x7f7c5520, count: -10 ddb{2}> ps PID TID PPIDUID S FLAGS WAIT COMMAND *56226 366608 70629 0 70x13cvs
Re: LibreSSL: extend the max. no. of SANs for avoid OOM error
ping On Tue, Mar 09, 2021 at 03:49:32PM +0100, Jan Klemkow wrote: > Hi, > > The verification of the https://ugos.ugm.ac.id certificate contains 2032 > subject alt names which leads to the following error in LibreSSL. > > # openssl s_client -connect ugos.ugm.ac.id:443 > ... > verify error:num=17:out of memory > ... > > The following diff sets the maximum number of SANs to the next higher > number to fix this issue. > > OK? > > bye, > Jan > > Index: lib/libcrypto/x509/x509_internal.h > === > RCS file: /cvs/src/lib/libcrypto/x509/x509_internal.h,v > retrieving revision 1.6 > diff -u -p -r1.6 x509_internal.h > --- lib/libcrypto/x509/x509_internal.h5 Jan 2021 16:45:59 - > 1.6 > +++ lib/libcrypto/x509/x509_internal.h9 Mar 2021 14:38:35 - > @@ -31,7 +31,7 @@ > * Limit the number of names and constraints we will check in a chain > * to avoid a hostile input DOS > */ > -#define X509_VERIFY_MAX_CHAIN_NAMES 512 > +#define X509_VERIFY_MAX_CHAIN_NAMES 4096 > #define X509_VERIFY_MAX_CHAIN_CONSTRAINTS512 > > /* >
Re: ixl(4): add ID for X710 10G SFP+
On Mon, Mar 15, 2021 at 01:35:28AM -0600, Theo de Raadt wrote: > My comments are about the "text name", which goes into every kernel > anyone compiles. > > It should be as short as possible. Sorry, I missed that point. > But the reason why 10G is incorrect is because surely the port can > accept 1G, or a variety of other SFPs... It is simply too exact, > and wasting kernel bytes. OK? Thanks, Jan Index: if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.73 diff -u -p -r1.73 if_ixl.c --- if_ixl.c26 Feb 2021 10:36:45 - 1.73 +++ if_ixl.c15 Mar 2021 07:42:48 - @@ -1611,6 +1611,7 @@ struct ixl_device { static const struct ixl_device ixl_devices[] = { { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP }, + { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP_2 }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_40G_BP }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_BP, }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_QSFP_1 }, Index: pcidevs === RCS file: /cvs/src/sys/dev/pci/pcidevs,v retrieving revision 1.1960 diff -u -p -r1.1960 pcidevs --- pcidevs 14 Mar 2021 01:09:29 - 1.1960 +++ pcidevs 15 Mar 2021 07:42:19 - @@ -3702,6 +3702,7 @@ product INTEL ICH8_IGP_AMT0x104a ICH8 I product INTEL ICH8_IGP_C 0x104b ICH8 IGP C product INTEL ICH8_IFE 0x104c ICH8 IFE product INTEL ICH8_IGP_M 0x104d ICH8 IGP M +product INTEL X710_10G_SFP_2 0x104e X710 SFP+ product INTEL PRO_100_VE_4 0x1050 PRO/100 VE product INTEL PRO_100_VE_5 0x1051 PRO/100 VE product INTEL PRO_100_VM_6 0x1052 PRO/100 VM Index: pcidevs.h === RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v retrieving revision 1.1954 diff -u -p -r1.1954 pcidevs.h --- pcidevs.h 14 Mar 2021 01:10:35 - 1.1954 +++ pcidevs.h 15 Mar 2021 07:42:21 - @@ -3707,6 +3707,7 @@ #definePCI_PRODUCT_INTEL_ICH8_IGP_C0x104b /* ICH8 IGP C */ #definePCI_PRODUCT_INTEL_ICH8_IFE 0x104c /* ICH8 IFE */ #definePCI_PRODUCT_INTEL_ICH8_IGP_M0x104d /* ICH8 IGP M */ +#definePCI_PRODUCT_INTEL_X710_10G_SFP_20x104e /* X710 SFP+ */ #definePCI_PRODUCT_INTEL_PRO_100_VE_4 0x1050 /* PRO/100 VE */ #definePCI_PRODUCT_INTEL_PRO_100_VE_5 0x1051 /* PRO/100 VE */ #definePCI_PRODUCT_INTEL_PRO_100_VM_6 0x1052 /* PRO/100 VM */ Index: pcidevs_data.h === RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v retrieving revision 1.1949 diff -u -p -r1.1949 pcidevs_data.h --- pcidevs_data.h 14 Mar 2021 01:10:35 - 1.1949 +++ pcidevs_data.h 15 Mar 2021 07:42:21 - @@ -12252,6 +12252,10 @@ static const struct pci_known_product pc "ICH8 IGP M", }, { + PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP_2, + "X710 SFP+", + }, + { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_PRO_100_VE_4, "PRO/100 VE", },
Re: ixl(4): add ID for X710 10G SFP+
On Sun, Mar 14, 2021 at 12:39:42PM -0600, Theo de Raadt wrote: > > +product INTEL X710_10G_SFP_2 0x104e X710 10G SFP+ > > You only need: > > X710 SFP+ > > Adding 10G is incorrect. OK? Thanks, Jan Index: if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.73 diff -u -p -r1.73 if_ixl.c --- if_ixl.c26 Feb 2021 10:36:45 - 1.73 +++ if_ixl.c15 Mar 2021 07:17:14 - @@ -1611,6 +1611,7 @@ struct ixl_device { static const struct ixl_device ixl_devices[] = { { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP }, + { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_SFP }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_40G_BP }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_BP, }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_QSFP_1 }, Index: pcidevs === RCS file: /cvs/src/sys/dev/pci/pcidevs,v retrieving revision 1.1960 diff -u -p -r1.1960 pcidevs --- pcidevs 14 Mar 2021 01:09:29 - 1.1960 +++ pcidevs 15 Mar 2021 07:15:22 - @@ -3702,6 +3702,7 @@ product INTEL ICH8_IGP_AMT0x104a ICH8 I product INTEL ICH8_IGP_C 0x104b ICH8 IGP C product INTEL ICH8_IFE 0x104c ICH8 IFE product INTEL ICH8_IGP_M 0x104d ICH8 IGP M +product INTEL X710_SFP 0x104e X710 SFP+ product INTEL PRO_100_VE_4 0x1050 PRO/100 VE product INTEL PRO_100_VE_5 0x1051 PRO/100 VE product INTEL PRO_100_VM_6 0x1052 PRO/100 VM Index: pcidevs.h === RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v retrieving revision 1.1954 diff -u -p -r1.1954 pcidevs.h --- pcidevs.h 14 Mar 2021 01:10:35 - 1.1954 +++ pcidevs.h 15 Mar 2021 07:15:24 - @@ -3707,6 +3707,7 @@ #definePCI_PRODUCT_INTEL_ICH8_IGP_C0x104b /* ICH8 IGP C */ #definePCI_PRODUCT_INTEL_ICH8_IFE 0x104c /* ICH8 IFE */ #definePCI_PRODUCT_INTEL_ICH8_IGP_M0x104d /* ICH8 IGP M */ +#definePCI_PRODUCT_INTEL_X710_SFP 0x104e /* X710 SFP+ */ #definePCI_PRODUCT_INTEL_PRO_100_VE_4 0x1050 /* PRO/100 VE */ #definePCI_PRODUCT_INTEL_PRO_100_VE_5 0x1051 /* PRO/100 VE */ #definePCI_PRODUCT_INTEL_PRO_100_VM_6 0x1052 /* PRO/100 VM */ Index: pcidevs_data.h === RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v retrieving revision 1.1949 diff -u -p -r1.1949 pcidevs_data.h --- pcidevs_data.h 14 Mar 2021 01:10:35 - 1.1949 +++ pcidevs_data.h 15 Mar 2021 07:15:24 - @@ -12252,6 +12252,10 @@ static const struct pci_known_product pc "ICH8 IGP M", }, { + PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_SFP, + "X710 SFP+", + }, + { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_PRO_100_VE_4, "PRO/100 VE", },
ixl(4): add ID for X710 10G SFP+
Hi, This diff attaches the Intel x710 10G SFP+ NIC in ixl(4). ixl2 at pci11 dev 0 function 2 "Intel X710 10G SFP+" rev 0x02: port 1, FW 8.1.63299 API 1.12, msix, 8 queues, address 3c:ec:ef:1f:c3:bc ixl3 at pci11 dev 0 function 3 "Intel X710 10G SFP+" rev 0x02: port 3, FW 8.1.63299 API 1.12, msix, 8 queues, address 3c:ec:ef:1f:c3:bd ixl2: flags=8802 mtu 1500 lladdr 3c:ec:ef:1f:c3:bc index 3 priority 0 llprio 3 media: Ethernet autoselect status: no carrier ixl3: flags=8802 mtu 1500 lladdr 3c:ec:ef:1f:c3:bd index 4 priority 0 llprio 3 media: Ethernet autoselect status: no carrier OK? bye, Jan Index: pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.73 diff -u -p -r1.73 if_ixl.c --- pci/if_ixl.c26 Feb 2021 10:36:45 - 1.73 +++ pci/if_ixl.c13 Mar 2021 23:56:13 - @@ -1611,6 +1611,7 @@ struct ixl_device { static const struct ixl_device ixl_devices[] = { { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP }, + { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP_2 }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_40G_BP }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_BP, }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_QSFP_1 }, Index: pci/pcidevs === RCS file: /cvs/src/sys/dev/pci/pcidevs,v retrieving revision 1.1960 diff -u -p -r1.1960 pcidevs --- pci/pcidevs 14 Mar 2021 01:09:29 - 1.1960 +++ pci/pcidevs 14 Mar 2021 11:33:27 - @@ -3702,6 +3702,7 @@ product INTEL ICH8_IGP_AMT0x104a ICH8 I product INTEL ICH8_IGP_C 0x104b ICH8 IGP C product INTEL ICH8_IFE 0x104c ICH8 IFE product INTEL ICH8_IGP_M 0x104d ICH8 IGP M +product INTEL X710_10G_SFP_2 0x104e X710 10G SFP+ product INTEL PRO_100_VE_4 0x1050 PRO/100 VE product INTEL PRO_100_VE_5 0x1051 PRO/100 VE product INTEL PRO_100_VM_6 0x1052 PRO/100 VM Index: pci/pcidevs.h === RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v retrieving revision 1.1954 diff -u -p -r1.1954 pcidevs.h --- pci/pcidevs.h 14 Mar 2021 01:10:35 - 1.1954 +++ pci/pcidevs.h 14 Mar 2021 11:33:27 - @@ -3707,6 +3707,7 @@ #definePCI_PRODUCT_INTEL_ICH8_IGP_C0x104b /* ICH8 IGP C */ #definePCI_PRODUCT_INTEL_ICH8_IFE 0x104c /* ICH8 IFE */ #definePCI_PRODUCT_INTEL_ICH8_IGP_M0x104d /* ICH8 IGP M */ +#definePCI_PRODUCT_INTEL_X710_10G_SFP_20x104e /* X710 10G SFP+ */ #definePCI_PRODUCT_INTEL_PRO_100_VE_4 0x1050 /* PRO/100 VE */ #definePCI_PRODUCT_INTEL_PRO_100_VE_5 0x1051 /* PRO/100 VE */ #definePCI_PRODUCT_INTEL_PRO_100_VM_6 0x1052 /* PRO/100 VM */ Index: pci/pcidevs_data.h === RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v retrieving revision 1.1949 diff -u -p -r1.1949 pcidevs_data.h --- pci/pcidevs_data.h 14 Mar 2021 01:10:35 - 1.1949 +++ pci/pcidevs_data.h 14 Mar 2021 11:33:27 - @@ -12252,6 +12252,10 @@ static const struct pci_known_product pc "ICH8 IGP M", }, { + PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP_2, + "X710 10G SFP+", + }, + { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_PRO_100_VE_4, "PRO/100 VE", },
Re: add missing PCI ID for Intel NVMe
On Fri, Mar 12, 2021 at 11:56:00AM +0100, Mark Kettenis wrote: > I believe this is what ark.intel.com calls a "Intel SSD DC P4510 > Series" part. Is that correct? Yes, that is correct. On Fri, Mar 12, 2021 at 10:00:54PM +1100, Jonathan Gray wrote: > On Fri, Mar 12, 2021 at 11:30:04AM +0100, Jan Klemkow wrote: > So it is a 'SSD DC P4510' > > A driver downloaded from Intel has > ... > PCI\VEN_8086&DEV_0A54.DeviceDesc = "Intel(R) SSD DC > P4500/4600/4501/4601/4608/4510/4610/4511 Series" > ... > > perhaps just > product INTEL NVME_5 0x0a54 SSD DC You are right, that's a better name. Also the sticker on the disk just says "Intel SSD DC". OK? Thanks, Jan Index: pcidevs === RCS file: /cvs/src/sys/dev/pci/pcidevs,v retrieving revision 1.1959 diff -u -p -r1.1959 pcidevs --- pcidevs 27 Feb 2021 03:00:54 - 1.1959 +++ pcidevs 13 Mar 2021 20:22:04 - @@ -3465,6 +3465,7 @@ product INTEL CORE4G_M_ULT_GT30x0a26 HD product INTEL CORE4G_S_ULT_GT3 0x0a2a HD Graphics product INTEL CORE4G_R_ULT_GT3_1 0x0a2bHD Graphics product INTEL CORE4G_R_ULT_GT3_2 0x0a2eIris Graphics 5100 +product INTEL NVME_5 0x0a54 SSD DC product INTEL GMA3600_00x0be0 GMA 3600 product INTEL D2000_IGD0x0be1 Atom D2000/N2000 Video product INTEL GMA3600_20x0be2 GMA 3600 Index: pcidevs.h === RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v retrieving revision 1.1953 diff -u -p -r1.1953 pcidevs.h --- pcidevs.h 27 Feb 2021 03:01:25 - 1.1953 +++ pcidevs.h 13 Mar 2021 20:22:06 - @@ -3470,6 +3470,7 @@ #definePCI_PRODUCT_INTEL_CORE4G_S_ULT_GT3 0x0a2a /* HD Graphics */ #definePCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_10x0a2b /* HD Graphics */ #definePCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_20x0a2e /* Iris Graphics 5100 */ +#definePCI_PRODUCT_INTEL_NVME_50x0a54 /* SSD DC */ #definePCI_PRODUCT_INTEL_GMA3600_0 0x0be0 /* GMA 3600 */ #definePCI_PRODUCT_INTEL_D2000_IGD 0x0be1 /* Atom D2000/N2000 Video */ #definePCI_PRODUCT_INTEL_GMA3600_2 0x0be2 /* GMA 3600 */ Index: pcidevs_data.h === RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v retrieving revision 1.1948 diff -u -p -r1.1948 pcidevs_data.h --- pcidevs_data.h 27 Feb 2021 03:01:25 - 1.1948 +++ pcidevs_data.h 13 Mar 2021 20:22:06 - @@ -11304,6 +11304,10 @@ static const struct pci_known_product pc "Iris Graphics 5100", }, { + PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_NVME_5, + "SSD DC", + }, + { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_GMA3600_0, "GMA 3600", },
add missing PCI ID for Intel NVMe
Hi, This diff add a missing PCI ID of an Intel NVMe disk. The disk works after my last fix [1]. OK? bye, Jan [1]: https://marc.info/?l=openbsd-tech&m=161418460303831 Index: pcidevs === RCS file: /cvs/src/sys/dev/pci/pcidevs,v retrieving revision 1.1959 diff -u -p -r1.1959 pcidevs --- pcidevs 27 Feb 2021 03:00:54 - 1.1959 +++ pcidevs 12 Mar 2021 10:16:44 - @@ -3465,6 +3465,7 @@ product INTEL CORE4G_M_ULT_GT30x0a26 HD product INTEL CORE4G_S_ULT_GT3 0x0a2a HD Graphics product INTEL CORE4G_R_ULT_GT3_1 0x0a2bHD Graphics product INTEL CORE4G_R_ULT_GT3_2 0x0a2eIris Graphics 5100 +product INTEL NVME_1 0x0a54 NVMe Datacenter SSD product INTEL GMA3600_00x0be0 GMA 3600 product INTEL D2000_IGD0x0be1 Atom D2000/N2000 Video product INTEL GMA3600_20x0be2 GMA 3600 Index: pcidevs.h === RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v retrieving revision 1.1953 diff -u -p -r1.1953 pcidevs.h --- pcidevs.h 27 Feb 2021 03:01:25 - 1.1953 +++ pcidevs.h 12 Mar 2021 10:16:46 - @@ -3470,6 +3470,7 @@ #definePCI_PRODUCT_INTEL_CORE4G_S_ULT_GT3 0x0a2a /* HD Graphics */ #definePCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_10x0a2b /* HD Graphics */ #definePCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_20x0a2e /* Iris Graphics 5100 */ +#definePCI_PRODUCT_INTEL_NVME_10x0a54 /* NVMe Datacenter SSD */ #definePCI_PRODUCT_INTEL_GMA3600_0 0x0be0 /* GMA 3600 */ #definePCI_PRODUCT_INTEL_D2000_IGD 0x0be1 /* Atom D2000/N2000 Video */ #definePCI_PRODUCT_INTEL_GMA3600_2 0x0be2 /* GMA 3600 */ Index: pcidevs_data.h === RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v retrieving revision 1.1948 diff -u -p -r1.1948 pcidevs_data.h --- pcidevs_data.h 27 Feb 2021 03:01:25 - 1.1948 +++ pcidevs_data.h 12 Mar 2021 10:16:46 - @@ -11304,6 +11304,10 @@ static const struct pci_known_product pc "Iris Graphics 5100", }, { + PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_NVME_1, + "NVMe Datacenter SSD", + }, + { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_GMA3600_0, "GMA 3600", },
LibreSSL: extend the max. no. of SANs for avoid OOM error
Hi, The verification of the https://ugos.ugm.ac.id certificate contains 2032 subject alt names which leads to the following error in LibreSSL. # openssl s_client -connect ugos.ugm.ac.id:443 ... verify error:num=17:out of memory ... The following diff sets the maximum number of SANs to the next higher number to fix this issue. OK? bye, Jan Index: lib/libcrypto/x509/x509_internal.h === RCS file: /cvs/src/lib/libcrypto/x509/x509_internal.h,v retrieving revision 1.6 diff -u -p -r1.6 x509_internal.h --- lib/libcrypto/x509/x509_internal.h 5 Jan 2021 16:45:59 - 1.6 +++ lib/libcrypto/x509/x509_internal.h 9 Mar 2021 14:38:35 - @@ -31,7 +31,7 @@ * Limit the number of names and constraints we will check in a chain * to avoid a hostile input DOS */ -#define X509_VERIFY_MAX_CHAIN_NAMES512 +#define X509_VERIFY_MAX_CHAIN_NAMES4096 #define X509_VERIFY_MAX_CHAIN_CONSTRAINTS 512 /*
Re: pcidump(8): add missing PCI classes
On Fri, Mar 05, 2021 at 09:22:28AM -0700, Theo de Raadt wrote: > Fix dump() to convert subclass == NULL into something else, or maybe the > fix should be in pci_subclass() itself. My initial mistake was to use zero in an empty list. This leads to one element in the list, which causes wrong list handling in the followup code path. So, the following diff remove the zero from the list. Also, add a check for ps->name is NULL, to prevent dump() to print (null). And fix a useless line break while here. OK? Thanks, Jan Index: pcidump.c === RCS file: /cvs/src/usr.sbin/pcidump/pcidump.c,v retrieving revision 1.62 diff -u -p -r1.62 pcidump.c --- pcidump.c 5 Mar 2021 12:57:20 - 1.62 +++ pcidump.c 5 Mar 2021 17:05:40 - @@ -1296,8 +1296,8 @@ static const struct pci_subclass pci_sub { PCI_SUBCLASS_DASP_MISC, "Miscellaneous" }, }; -static const struct pci_subclass pci_subclass_accelerator[] = {0}; -static const struct pci_subclass pci_subclass_instrumentation[] = {0}; +static const struct pci_subclass pci_subclass_accelerator[] = {}; +static const struct pci_subclass pci_subclass_instrumentation[] = {}; #define CLASS(_c, _n, _s) { \ .class = _c, \ @@ -1389,7 +1389,6 @@ pci_class_name(pci_class_t class) return (pc->name); } - static const char * pci_subclass_name(pci_class_t class, pci_subclass_t subclass) { @@ -1401,7 +1400,7 @@ pci_subclass_name(pci_class_t class, pci return ("(unknown)"); ps = pci_subclass(pc, subclass); - if (ps == NULL) + if (ps == NULL || ps->name == NULL) return ("(unknown)"); return (ps->name);
Re: pcidump(8): add missing PCI classes
On Fri, Mar 05, 2021 at 04:13:53PM +0100, Mark Kettenis wrote: > > Date: Fri, 5 Mar 2021 12:05:38 +0100 > > From: Jan Klemkow > > Content-Type: text/plain; charset=us-ascii > > Content-Disposition: inline > > > > Hi, > > > > this diff adds the missing PCI classes Accelerator and Instrumentation. > > Thus, we can replace a few unknown in its output: > > > > - 0x0008: Class: 13 (unknown), Subclass: 00 (unknown), > > + 0x0008: Class: 13 Instrumentation, Subclass: 00 (null), > > Is this "(null)" the result of printing a null pointer? That would be > not so good. What do you suggest to use instead? Empty String, or "unknown"? It is vendor specific. Thanks, Jan
pcidump(8): add missing PCI classes
Hi, this diff adds the missing PCI classes Accelerator and Instrumentation. Thus, we can replace a few unknown in its output: - 0x0008: Class: 13 (unknown), Subclass: 00 (unknown), + 0x0008: Class: 13 Instrumentation, Subclass: 00 (null), Both Classes have vendor specific APIs. So, there are no predefined subclasses. OK? bye, Jan Index: pcidump.c === RCS file: /cvs/src/usr.sbin/pcidump/pcidump.c,v retrieving revision 1.61 diff -u -p -r1.61 pcidump.c --- pcidump.c 17 Jan 2021 11:54:15 - 1.61 +++ pcidump.c 5 Mar 2021 10:57:27 - @@ -1296,6 +1296,9 @@ static const struct pci_subclass pci_sub { PCI_SUBCLASS_DASP_MISC, "Miscellaneous" }, }; +static const struct pci_subclass pci_subclass_accelerator[] = {0}; +static const struct pci_subclass pci_subclass_instrumentation[] = {0}; + #define CLASS(_c, _n, _s) { \ .class = _c, \ .name = _n, \ @@ -1338,6 +1341,10 @@ static const struct pci_class pci_classe pci_subclass_crypto), CLASS(PCI_CLASS_DASP, "DASP", pci_subclass_dasp), + CLASS(PCI_CLASS_ACCELERATOR,"Accelerator", + pci_subclass_accelerator), + CLASS(PCI_CLASS_INSTRUMENTATION, "Instrumentation", + pci_subclass_instrumentation), }; static const struct pci_class *
ixl(4): add missing pci dev id for X710 10GBase-T
Hi, The diff below adds a missing PCI device ID for an X710 10GBase NIC into the ixl(4) driver. The interfaces attach and run properly with this diff. ixl0 at pci11 dev 0 function 0 "Intel X710 10GBaseT" rev 0x02: port 0, FW 8.1.63299 API 1.12, msix, 8 queues, address 3c:ec:ef:1f:c3:ba ixl1 at pci11 dev 0 function 1 "Intel X710 10GBaseT" rev 0x02: port 2, FW 8.1.63299 API 1.12, msix, 8 queues, address 3c:ec:ef:1f:c3:bb # ifconfig ixl ixl0: flags=8843 mtu 1500 lladdr 3c:ec:ef:1f:c3:ba index 1 priority 0 llprio 3 media: Ethernet autoselect (1000baseT full-duplex) status: active inet 192.168.123.1 netmask 0xff00 broadcast 192.168.123.255 ixl1: flags=8843 mtu 1500 lladdr 3c:ec:ef:1f:c3:bb index 2 priority 0 llprio 3 media: Ethernet autoselect (1000baseT full-duplex) status: active inet 192.168.124.1 netmask 0xff00 broadcast 192.168.124.255 OK? bye, Jan Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.72 diff -u -p -r1.72 if_ixl.c --- dev/pci/if_ixl.c25 Jan 2021 11:11:22 - 1.72 +++ dev/pci/if_ixl.c26 Feb 2021 09:51:56 - @@ -1622,6 +1622,7 @@ static const struct ixl_device ixl_devic { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_T4_10G }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XXV710_25G_BP }, { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XXV710_25G_SFP28, }, + { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_T, }, { &ixl_722, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X722_10G_KX }, { &ixl_722, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X722_10G_QSFP }, { &ixl_722, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X722_10G_SFP_1 }, Index: dev/pci/pcidevs === RCS file: /cvs/src/sys/dev/pci/pcidevs,v retrieving revision 1.1956 diff -u -p -r1.1956 pcidevs --- dev/pci/pcidevs 22 Feb 2021 01:17:23 - 1.1956 +++ dev/pci/pcidevs 26 Feb 2021 09:49:01 - @@ -3962,6 +3962,7 @@ product INTEL I219_V140x15fa I219-V product INTEL I219_LM130x15fb I219-LM product INTEL I219_V13 0x15fc I219-V product INTEL I225_BLANK_NVM 0x15fd I225 +product INTEL X710_10G_T 0x15ff X710 10GBaseT product INTEL CORE5G_H_PCIE_X160x1601 Core 5G PCIE product INTEL CORE5G_M_GT1_1 0x1602 HD Graphics product INTEL CORE5G_THERM 0x1603 Core 5G Thermal Index: dev/pci/pcidevs.h === RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v retrieving revision 1.1949 diff -u -p -r1.1949 pcidevs.h --- dev/pci/pcidevs.h 22 Feb 2021 01:18:01 - 1.1949 +++ dev/pci/pcidevs.h 26 Feb 2021 09:49:05 - @@ -3967,6 +3967,7 @@ #definePCI_PRODUCT_INTEL_I219_LM13 0x15fb /* I219-LM */ #definePCI_PRODUCT_INTEL_I219_V13 0x15fc /* I219-V */ #definePCI_PRODUCT_INTEL_I225_BLANK_NVM0x15fd /* I225 */ +#definePCI_PRODUCT_INTEL_X710_10G_T0x15ff /* X710 10GBaseT */ #definePCI_PRODUCT_INTEL_CORE5G_H_PCIE_X16 0x1601 /* Core 5G PCIE */ #definePCI_PRODUCT_INTEL_CORE5G_M_GT1_10x1602 /* HD Graphics */ #definePCI_PRODUCT_INTEL_CORE5G_THERM 0x1603 /* Core 5G Thermal */ Index: dev/pci/pcidevs_data.h === RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v retrieving revision 1.1944 diff -u -p -r1.1944 pcidevs_data.h --- dev/pci/pcidevs_data.h 22 Feb 2021 01:18:01 - 1.1944 +++ dev/pci/pcidevs_data.h 26 Feb 2021 09:49:05 - @@ -13292,6 +13292,10 @@ static const struct pci_known_product pc "I225", }, { + PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_T, + "X710 10GBaseT", + }, + { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_CORE5G_H_PCIE_X16, "Core 5G PCIE", },
Re: LibreSSL legacy verifier regression
On Wed, Feb 24, 2021 at 09:21:56PM +0100, Theo Buehler wrote: > On Wed, Feb 24, 2021 at 09:00:05PM +0100, Theo Buehler wrote: > > On Wed, Feb 24, 2021 at 06:47:00AM +0100, Jan Klemkow wrote: > > > another co-worker of mine has found an other regress in the LibreSSL > > > legacy verifier. I took his diff and made a test for our regression > > > framework. > > > > > > The legacy verifier seems not to check the certificate if no root CA was > > > given. The following test creates an expired certificate and tries to > > > verify it. In one case it found the expected error in another, it does > > > not. > > > > Thanks for the report and the test case, that's very helpful. The diff > > at the end addresses this. > > > > The verifier does not find the expected error because it now bails out > > earlier. This is a consequence of a refactoring of X509_verify_cert() > > (x509_vfy.c r1.75) that was done to integrate the new verifier. > > > > https://cvsweb.openbsd.org/cgi-bin/cvsweb/src/lib/libcrypto/x509/x509_vfy.c.diff?r1=1.74&r2=1.75 > > > > What happens is that x509_legacy_verify_build_chain() returns ok == 0 in > > your test case. The safety net at the end of x509_verify_cert_legacy() > > sets ctx->error to X509_V_ERR_UNSPECIFIED (so the unchecked call to > > X509_verify_cert() in your regress test actually indicates verification > > failure). > > > > > > The diff below restores the previous behavior and fixes a bug. > > > > Prior to the the refactoring, each 'goto end' in the code that is now in > > x509_legacy_verify_build_chain() would stop validation, while in other > > cases validation would have carried on. So indicate this via the return > > value and return ok via a pointer. > > > > The bug is that the return value check of x509_legacy_verify_build_chain() > > should have been if (ok <= 0), not if (!ok). > > > > Regarding your regress diff: I don't think we want to land it as it is. Ok. > > The verifier lives in libcrypto/x509, so the regress test belongs in > > there. You are right, its the better place. At least I want to send you a bug report with concrete code to test. > > We really need to come up with an extensible design that can check a > > number of such cases (and ideally includes the bulk of your openssl/x509 > > regress tests). I don't want to add a directory for every bug in the > > verifier or legacy verifier. As jsing already mentioned, I expect that > > we want to commit the test certs so we don't need perl modules from > > ports to run the regress. Then we want to add generating scripts and a > > README that gives instructions on how to regenerate the certs if needed. > > > > I would like to have one C program that runs all tests in a loop (or > > perhaps one C program per family of regressions). It would be nice if > > this C program could also be compiled against OpenSSL 1.1.1 so we can > > easily check for differences of behavior (see x509/bettertls/Makefile > > for an example that does this). For your test program this just means: > > don't access csc->blah, but use X509_STORE_CTX_get_blah(csc) instead. > > > > Why does the test set TRUSTED_FIRST? I just forget to remove the this line, from the original version. > > The code also needs a bit of cleaning. There are a number of unchecked > > return values, for example strdup and sk_*_push, and csc is leaked > > after X509_verify_cert(). > > > > It would also be nice to run this test against the new verifier. The test passes with the new verifier in current, but not in 6.8. > Missed an obvious simplification. The diff looks fine to me and it fixes our regressions. I would give you an OK jan, fwiw. Thanks, Jan > Index: x509/x509_vfy.c > === > RCS file: /cvs/src/lib/libcrypto/x509/x509_vfy.c,v > retrieving revision 1.85 > diff -u -p -r1.85 x509_vfy.c > --- x509/x509_vfy.c 11 Feb 2021 04:56:43 - 1.85 > +++ x509/x509_vfy.c 24 Feb 2021 20:19:34 - > @@ -240,12 +240,13 @@ x509_vfy_check_id(X509_STORE_CTX *ctx) { > * Oooh.. > */ > static int > -X509_verify_cert_legacy_build_chain(X509_STORE_CTX *ctx, int *bad) > +X509_verify_cert_legacy_build_chain(X509_STORE_CTX *ctx, int *bad, int > *out_ok) > { > X509 *x, *xtmp, *xtmp2, *chain_ss = NULL; > int bad_chain = 0; > X509_VERIFY_PARAM *param = ctx->param; > - int depth, i, ok = 0; > + int ok = 0, ret = 0; > + int depth, i; > int
fix nvme(4): NULL deref. and empty device attachments
Hi, While attaching the following disks, the nvme driver runs into a NULL dereference in nvme_scsi_capacity16() and nvme_scsi_capacity(). nvme0 at pci1 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: msix, NVMe 1.2 nvme0: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ0413002P4P0DGN scsibus1 at nvme0: 129 targets, initiator 0 sd0 at scsibus1 targ 1 lun 0: sd0: 3815447MB, 512 bytes/sector, 7814037168 sectors sd1 at scsibus1 targ 2 lun 0: uvm_fault(0x821d00e8, 0x0, 0, 1) -> e kernel: page fault trap, code=0 Stopped at nvme_scsi_capacity16+0x39: movq0(%rax),%rcx ddb{0}> "ns" in both functions will be NULL, if "identify" is not allocated in nvme_scsi_probe(). Thus, its better to just not attach this empty disks/LUNs. nvme0 at pci1 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: msix, NVMe 1.2 nvme0: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ0413002P4P0DGN scsibus1 at nvme0: 129 targets, initiator 0 sd0 at scsibus1 targ 1 lun 0: sd0: 3815447MB, 512 bytes/sector, 7814037168 sectors ppb1 at pci0 dev 3 function 2 "AMD 17h PCIE" rev 0x00: msi pci2 at ppb1 bus 98 nvme1 at pci2 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: msix, NVMe 1.2 nvme1: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ041500C34P0DGN scsibus2 at nvme1: 129 targets, initiator 0 sd1 at scsibus2 targ 1 lun 0: sd1: 3815447MB, 512 bytes/sector, 7814037168 sectors ppb2 at pci0 dev 3 function 3 "AMD 17h PCIE" rev 0x00: msi pci3 at ppb2 bus 99 nvme2 at pci3 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: msix, NVMe 1.2 nvme2: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ041402Z64P0DGN scsibus3 at nvme2: 129 targets, initiator 0 sd2 at scsibus3 targ 1 lun 0: sd2: 3815447MB, 512 bytes/sector, 7814037168 sectors ppb3 at pci0 dev 3 function 4 "AMD 17h PCIE" rev 0x00: msi pci4 at ppb3 bus 100 nvme3 at pci4 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: msix, NVMe 1.2 nvme3: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ041403134P0DGN scsibus4 at nvme3: 129 targets, initiator 0 sd3 at scsibus4 targ 1 lun 0: sd3: 3815447MB, 512 bytes/sector, 7814037168 sectors The following diff signals an error for the upper probing function in the SCSI layer to prevents further function calls in nvme(4) which would just leads to the upper described error and hundreds of not configured devices. OK? bye, Jan Index: dev/ic/nvme.c === RCS file: /cvs//src/sys/dev/ic/nvme.c,v retrieving revision 1.90 diff -u -p -r1.90 nvme.c --- dev/ic/nvme.c 9 Feb 2021 01:50:10 - 1.90 +++ dev/ic/nvme.c 24 Feb 2021 16:01:48 - @@ -463,11 +463,16 @@ nvme_scsi_probe(struct scsi_link *link) scsi_io_put(&sc->sc_iopool, ccb); identify = NVME_DMA_KVA(mem); - if (rv == 0 && lemtoh64(&identify->nsze) > 0) { - /* Commit namespace if it has a size greater than zero. */ - identify = malloc(sizeof(*identify), M_DEVBUF, M_WAITOK); - memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify)); - sc->sc_namespaces[link->target].ident = identify; + if (rv == 0) { + if (lemtoh64(&identify->nsze) > 0) { + /* Commit namespace if it has a size greater than zero. */ + identify = malloc(sizeof(*identify), M_DEVBUF, M_WAITOK); + memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify)); + sc->sc_namespaces[link->target].ident = identify; + } else { + /* Don't attach a namespace if its size is zero. */ + rv = ENXIO; + } } nvme_dmamem_free(sc, mem);
LibreSSL legacy verifier regression
Hi, another co-worker of mine has found an other regress in the LibreSSL legacy verifier. I took his diff and made a test for our regression framework. The legacy verifier seems not to check the certificate if no root CA was given. The following test creates an expired certificate and tries to verify it. In one case it found the expected error in another, it does not. OK? bye, Jan Index: lib/libcrypto/Makefile === RCS file: /cvs/src/regress/lib/libcrypto/Makefile,v retrieving revision 1.41 diff -u -p -r1.41 Makefile --- lib/libcrypto/Makefile 26 Dec 2020 00:48:56 - 1.41 +++ lib/libcrypto/Makefile 24 Feb 2021 05:29:51 - @@ -23,6 +23,7 @@ SUBDIR += ecdsa SUBDIR += engine SUBDIR += evp SUBDIR += exp +SUBDIR += expcert SUBDIR += free SUBDIR += gcm128 SUBDIR += gost Index: lib/libcrypto/expcert/Makefile === RCS file: lib/libcrypto/expcert/Makefile diff -N lib/libcrypto/expcert/Makefile --- /dev/null 1 Jan 1970 00:00:00 - +++ lib/libcrypto/expcert/Makefile 24 Feb 2021 05:39:38 - @@ -0,0 +1,29 @@ +# $OpenBSD$ + +LDFLAGS += -lcrypto + +PROG = expcrt + +PKG_REQUIRE != pkg_info -e 'p5-IO-Socket-SSL-*' +.if empty (PKG_REQUIRE) +regress: + @echo "missing package p5-IO-Socket-SSL" + @echo SKIPPED +.endif + +REGRESS_TARGETS = test-chain-with-root-CA +REGRESS_TARGETS += test-chain-without-root-CA +REGRESS_SETUP_ONCE = create-certs + +REGRESS_EXPECTED_FAILURES = test-chain-without-root-CA + +create-certs: create-certs.pl ${PROG} + perl ${.CURDIR}/create-certs.pl + +test-chain-with-root-CA: + ./expcrt -e 10 -r + +test-chain-without-root-CA: + ./expcrt -e 10 + +.include Index: lib/libcrypto/expcert/create-certs.pl === RCS file: lib/libcrypto/expcert/create-certs.pl diff -N lib/libcrypto/expcert/create-certs.pl --- /dev/null 1 Jan 1970 00:00:00 - +++ lib/libcrypto/expcert/create-certs.pl 24 Feb 2021 05:27:46 - @@ -0,0 +1,46 @@ +#!/usr/bin/perl + +# Copyright (c) 2021 Anton Borowka +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +use strict; +use warnings; + +use IO::Socket::SSL::Utils; + +my %certs; + +@{$certs{root}}{qw/cert key/} = CERT_create( +CA => 1, +not_after => time() + 31536, +subject => { commonName => 'Root CA' }, +); + +@{$certs{intermediate}}{qw/cert key/} = CERT_create( +CA => 1, +issuer => [@{$certs{root}}{qw/cert key/}], +not_after => time() + 31536, +subject => { commonName => 'Intermediate CA' }, +); + +@{$certs{expired}}{qw/cert key/} = CERT_create( +issuer => [@{$certs{intermediate}}{qw/cert key/}], +not_before => time() - 7200, +not_after => time() - 3600, +subject => { commonName => 'Expired' }, +); + +for (sort keys %certs) { +PEM_cert2file($certs{$_}{cert}, "$_.crt"); +} Index: lib/libcrypto/expcert/expcrt.c === RCS file: lib/libcrypto/expcert/expcrt.c diff -N lib/libcrypto/expcert/expcrt.c --- /dev/null 1 Jan 1970 00:00:00 - +++ lib/libcrypto/expcert/expcrt.c 24 Feb 2021 05:27:46 - @@ -0,0 +1,218 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2021 Jan Klemkow + * Copyright (c) 2021 Anton Borowka + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#includ
Re: LibreSSL regressions
On Tue, Feb 16, 2021 at 04:36:59AM +1100, Joel Sing wrote: > On 21-02-15 14:49:46, Jan Klemkow wrote: > > On Sat, Feb 13, 2021 at 03:53:48PM +0100, Theo Buehler wrote: > > > On Sat, Feb 13, 2021 at 11:58:04AM +0100, Jan Klemkow wrote: > > > > A coworker of mine has made tests with LibreSSL [1] and found some > > > > regressions. I took his test descriptions and created the following > > > > automated regression test. In the repository he described his findings > > > > in detail. I kept the numbers of the files and subtests in the target > > > > names for now. So, its easier to match it with his files. > > > > > > > > I don't know how to handle the result of "test-01-ssl". Thats why its > > > > just a comment. Someone may have an idea to handle this properly. > > > > > > > > Any comments, wishes or OK's? > > > > > > > > [1]: https://github.com/noxxi/libressl-tests > > > > > > First of all thanks for the effort! > > > > > > The perl script and probably also the Makefile should have a license. > > > > > > Please add a check that tests whether the required perl modules are > > > installed (p5-IO-Socket-SSL and p5-Net-SSLeay) and otherwise prints > > > SKIPPED and their names, so I can install them if they're not present. > > > I never remember their exact capitalization and hyphenation... > > > > > > Various comments inline, and a patch for openssl(1) at the end that may > > > simplify some things. > > > > This is an updated version of the test including comments and wishes > > from tb@ and bluhm@. > > > > OK? > > This currently drives openssl(1) for tests, which means that it is > testing openssl(1), libssl and libcrypto, when what you're really > wanting to test is libcrypto's verifier. While this works, the > problem is that a change or breakage in libssl or openssl(1) results > in a regress failure for libcrypto. If this is to land in its > current form it really should be in regress/usr.bin/openssl - > alternatively, it could be reworked to explicitly test libcrypto's > APIs and remain here. > > Some additional comments inline. So, the following diff should hit all needs. OK? Thanks, Jan Index: usr.bin/openssl/Makefile === RCS file: /cvs/src/regress/usr.bin/openssl/Makefile,v retrieving revision 1.6 diff -u -p -r1.6 Makefile --- usr.bin/openssl/Makefile19 Mar 2018 03:41:40 - 1.6 +++ usr.bin/openssl/Makefile15 Feb 2021 20:37:11 - @@ -1,6 +1,6 @@ # $OpenBSD: Makefile,v 1.6 2018/03/19 03:41:40 beck Exp $ -SUBDIR= options +SUBDIR= options x509 CLEANFILES+= testdsa.key testdsa.pem rsakey.pem rsacert.pem dsa512.pem CLEANFILES+= appstest_dir Index: usr.bin/openssl/x509/Makefile === RCS file: usr.bin/openssl/x509/Makefile diff -N usr.bin/openssl/x509/Makefile --- /dev/null 1 Jan 1970 00:00:00 - +++ usr.bin/openssl/x509/Makefile 16 Feb 2021 12:06:10 - @@ -0,0 +1,129 @@ +# $OpenBSD$ + +# Copyright (c) 2021 Jan Klemkow +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# This regression test is based on manual test descriptions from: +# https://github.com/noxxi/libressl-tests + +# The following port must be installed for the regression tests: +# p5-IO-Socket-SSL perl interface to SSL sockets + +PERL = perl +OPENSSL ?= openssl + +PKG_REQUIRE != pkg_info -e 'p5-IO-Socket-SSL-*' +.if empty (PKG_REQUIRE) +regress: + @echo "missing package p5-IO-Socket-SSL" + @echo SKIPPED +.endif + +REGRESS_TARGETS += test-inlabel-wildcard-cert-no-CA-client +REGRESS_TARGETS += test-inlabel-wildcard-cert-CA-client +REGRESS_TARGETS += test-common-wildcard-cert-no-CA-client +REGRESS_TARGETS += test-common-wildcard-cert-CA-client +REGRESS_TARGETS += test-verify-unusual-wildcard-cert +REGRESS_TARGETS += test-openssl-verify-common
Re: LibreSSL regressions
On Tue, Feb 16, 2021 at 04:36:59AM +1100, Joel Sing wrote: > On 21-02-15 14:49:46, Jan Klemkow wrote: > > +create-libressl-test-certs: create-libressl-test-certs.pl > > + ${PERL} ${.CURDIR}/$@.pl > > We can see how this goes, however we may end up wanting to generate > the certificates and commit them rather than regenerating on each > run. The other advantage is that p5-IO-Socket-SSL would only be > needed to regenerate the certificates and not actually run the > tests. What should I do? Just commit the generated files and remove the Perl script? > > Index: regress/lib/libcrypto/validate/create-libressl-test-certs.pl > > === > > RCS file: regress/lib/libcrypto/validate/create-libressl-test-certs.pl > > diff -N regress/lib/libcrypto/validate/create-libressl-test-certs.pl > > --- /dev/null 1 Jan 1970 00:00:00 - > > +++ regress/lib/libcrypto/validate/create-libressl-test-certs.pl15 Feb > > 2021 12:54:58 - > > @@ -0,0 +1,111 @@ > > +#!/usr/bin/perl > > + > > +# Copyright (c) 2021 Steffen Ullrich > > +# Public Domain > > + > > +use strict; > > +use warnings; > > +use IO::Socket::SSL::Utils; > > + > > +# primitive CA - ROOT > > +my @ca = cert( > > +CA => 1, > > +subject => { CN => 'ROOT' } > > +); > > +out('caR.pem', pem(crt => $ca[0])); > > +out('caR.key', pem(key => $ca[1])); > > + > > +# server certificate where SAN contains in-label wildcards which are > > allowed by > > +# RFC 6125 > > It is worth noting that per the RFC, a client MAY allow in-label > wildcards (this is not a MUST or even a SHOULD). Additionally, > various software does not allow or support this (for example, libtls > and hence ftp(1)). What should I do here? Thanks, Jan
Re: LibreSSL regressions
On Sat, Feb 13, 2021 at 03:53:48PM +0100, Theo Buehler wrote: > On Sat, Feb 13, 2021 at 11:58:04AM +0100, Jan Klemkow wrote: > > A coworker of mine has made tests with LibreSSL [1] and found some > > regressions. I took his test descriptions and created the following > > automated regression test. In the repository he described his findings > > in detail. I kept the numbers of the files and subtests in the target > > names for now. So, its easier to match it with his files. > > > > I don't know how to handle the result of "test-01-ssl". Thats why its > > just a comment. Someone may have an idea to handle this properly. > > > > Any comments, wishes or OK's? > > > > [1]: https://github.com/noxxi/libressl-tests > > First of all thanks for the effort! > > The perl script and probably also the Makefile should have a license. > > Please add a check that tests whether the required perl modules are > installed (p5-IO-Socket-SSL and p5-Net-SSLeay) and otherwise prints > SKIPPED and their names, so I can install them if they're not present. > I never remember their exact capitalization and hyphenation... > > Various comments inline, and a patch for openssl(1) at the end that may > simplify some things. This is an updated version of the test including comments and wishes from tb@ and bluhm@. OK? Thanks, Jan Index: regress/lib/libcrypto/validate/Makefile === RCS file: regress/lib/libcrypto/validate/Makefile diff -N regress/lib/libcrypto/validate/Makefile --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ regress/lib/libcrypto/validate/Makefile 15 Feb 2021 13:38:22 - @@ -0,0 +1,133 @@ +# $OpenBSD$ + +# Copyright (c) 2021 Jan Klemkow +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# This regression test is based on manual test descriptions from: +# https://github.com/noxxi/libressl-tests + +# The following port must be installed for the regression tests: +# p5-IO-Socket-SSL perl interface to SSL sockets + +PERL = perl +OPENSSL ?= openssl + +PERL_REQUIRE !=perl -Mstrict -Mwarnings -e ' \ +eval { require IO::Socket::SSL } or print $@; \ +' +.if ! empty (PERL_REQUIRE) +regress: + @echo "${PERL_REQUIRE}" + @echo install these perl packages for additional tests + @echo SKIPPED +.endif + +REGRESS_TARGETS += test-unusual-wildcard-cert-no-CA-client +REGRESS_TARGETS += test-unusual-wildcard-cert-CA-client +REGRESS_TARGETS += test-common-wildcard-cert-no-CA-client +REGRESS_TARGETS += test-common wildcard-cert-CA-client +REGRESS_TARGETS += test-verify-unusual-wildcard-cert +REGRESS_TARGETS += test-openssl-verify-common-wildcard-cert +REGRESS_TARGETS += test-chain-certificates-s_server +REGRESS_TARGETS += test-alternative-chain +REGRESS_CLEANUP = cleanup-ssl +REGRESS_SETUP_ONCE = create-libressl-test-certs + +REGRESS_EXPECTED_FAILURES += test-unusual-wildcard-cert-no-CA-client +REGRESS_EXPECTED_FAILURES += test-common-wildcard-cert-no-CA-client +REGRESS_EXPECTED_FAILURES += test-common wildcard-cert-CA-client +REGRESS_EXPECTED_FAILURES += test-verify-unusual-wildcard-cert +REGRESS_EXPECTED_FAILURES += test-alternative-chain + +create-libressl-test-certs: create-libressl-test-certs.pl + ${PERL} ${.CURDIR}/$@.pl + +cleanup-ssl: + rm *.pem *.key + +test-unusual-wildcard-cert-no-CA-client: + # unusual wildcard cert, no CA given to client + # start client + ${OPENSSL} s_server -cert server-unusual-wildcard.pem \ + -key server-unusual-wildcard.pem & \ + timeout=$$(($$(date +%s) + 5)); \ + while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \ + do test $$(date +%s) -lt $$timeout || exit 1; done + # start client + echo "Q" | ${OPENSSL} s_client -verify_return_error \ + | grep "Verify return code: 21" + +test-unusual-wildcard-cert-CA-client: + # unusual wildcard cert, CA given to client + # start server + ${OPENSSL} s_server -cert server-unusual-wildcard.pem \ + -key ser
LibreSSL regressions
Hi, A coworker of mine has made tests with LibreSSL [1] and found some regressions. I took his test descriptions and created the following automated regression test. In the repository he described his findings in detail. I kept the numbers of the files and subtests in the target names for now. So, its easier to match it with his files. I don't know how to handle the result of "test-01-ssl". Thats why its just a comment. Someone may have an idea to handle this properly. Any comments, wishes or OK's? bye, Jan [1]: https://github.com/noxxi/libressl-tests Index: regress/lib/libssl/Makefile === RCS file: /cvs/src/regress/lib/libssl/Makefile,v retrieving revision 1.42 diff -u -p -r1.42 Makefile --- regress/lib/libssl/Makefile 14 Oct 2020 15:53:22 - 1.42 +++ regress/lib/libssl/Makefile 12 Feb 2021 19:42:56 - @@ -16,6 +16,7 @@ SUBDIR += tlsext SUBDIR += tlslegacy SUBDIR += key_schedule SUBDIR += unit +SUBDIR += validate # Things that take a long time should go below here. SUBDIR += tlsfuzzer Index: regress/lib/libssl/validate/Makefile === RCS file: regress/lib/libssl/validate/Makefile diff -N regress/lib/libssl/validate/Makefile --- /dev/null 1 Jan 1970 00:00:00 - +++ regress/lib/libssl/validate/Makefile13 Feb 2021 10:50:30 - @@ -0,0 +1,104 @@ +# Tests from: https://github.com/noxxi/libressl-tests + +PERL=perl + +REGRESS_TARGETS = test-00-01-ssl +REGRESS_TARGETS += test-00-02-ssl +REGRESS_TARGETS += test-00-03-ssl +REGRESS_TARGETS += test-00-04-ssl +REGRESS_TARGETS += test-00-05-ssl +REGRESS_TARGETS += test-00-06-ssl +REGRESS_TARGETS += test-01-ssl +REGRESS_TARGETS += test-02-ssl +REGRESS_ROOT_TARGETS = ${REGRESS_TARGETS} +REGRESS_CLEANUP = cleanup-ssl +REGRESS_SETUP =create-libressl-test-certs + +create-libressl-test-certs: create-libressl-test-certs.pl + ${PERL} ${.CURDIR}/$@.pl + +cleanup-ssl: + pkill openssl || true + rm *.pem *.key + +test-00-01-ssl: + # unusual wildcard cert, no CA given to client + # cleanup + pkill openssl || true + sleep 2 + # start client + ${KTRACE} openssl s_server -cert server-unusual-wildcard.pem \ + -key server-unusual-wildcard.pem -www & \ + timeout=$$(($$(date +%s) + 5)); \ + while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \ + do test $$(date +%s) -lt $$timeout || exit 1; done + # start client + echo "data" | openssl s_client -verify_return_error -connect 127.0.0.1:4433 \ + | grep "Verify return code: 21" + +test-00-02-ssl: + # unusual wildcard cert, CA given to client + # cleanup + pkill openssl || true + sleep 2 + # start server + ${KTRACE} openssl s_server -cert server-unusual-wildcard.pem \ + -key server-unusual-wildcard.pem -www & \ + timeout=$$(($$(date +%s) + 5)); \ + while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \ + do test $$(date +%s) -lt $$timeout || exit 1; done + # start client + echo "data" | openssl s_client -connect 127.0.0.1:4433 -CAfile caR.pem \ + | grep "Verify return code: 0" + +test-00-03-ssl: + # common wildcard cert, no CA given to client + # cleanup + pkill openssl || true + sleep 2 + # start server + ${KTRACE} openssl s_server -cert server-common-wildcard.pem \ + -key server-common-wildcard.pem -www & \ + timeout=$$(($$(date +%s) + 5)); \ + while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \ + do test $$(date +%s) -lt $$timeout || exit 1; done + # start client + echo "data" | openssl s_client -connect 127.0.0.1:4433 \ + | grep "Verify return code: 21" + +test-00-04-ssl: + # common wildcard cert, CA given to client + # cleanup + pkill openssl || true + sleep 2 + # start server + ${KTRACE} openssl s_server -cert server-unusual-wildcard.pem \ + -key server-unusual-wildcard.pem -www & \ + timeout=$$(($$(date +%s) + 5)); \ + while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \ + do test $$(date +%s) -lt $$timeout || exit 1; done + # start client + echo "data" | openssl s_client -connect 127.0.0.1:4433 -CAfile caR.pem \ + | grep "Verify return code: 21" + +test-00-05-ssl: + # openssl verify, unusual wildcard cert + openssl verify -CAfile caR.pem server-unusual-wildcard.pem \ + | grep "server-unusual-wildcard.pem: OK" + +test-00-06-ssl: + # openssl verify, common wildcard cert + openssl verify -CAfile caR.pem server-common-wildcard.pem \ + | grep "server-common-wildcard.pem: OK" + +test-01-ssl: + # Not all chain certificates are sent in s_server + # o
Re: diff: tcp ack improvement
On Mon, Feb 08, 2021 at 03:42:54PM +0100, Alexander Bluhm wrote: > On Wed, Feb 03, 2021 at 11:20:04AM +0100, Claudio Jeker wrote: > > Just commit it. OK claudio@ > > If people see problems we can back it out again. > > This has huge impact on TCP performance. > > http://bluhm.genua.de/perform/results/2021-02-07T00%3A01%3A40Z/perform.html > > For a single TCP connection between to OpenBSD boxes, througput > drops by 77% from 3.1 GBit/sec to 710 MBit/sec. But with 100 > parallel connections the througput over all increases by 5%. For single connections our kernel is limited to send out 4 max TCP segments. I don't see that, because I just measured with 10 and 30 streams in parallel. FreeBSD disabled it 20 yeas ago. https://github.com/freebsd/freebsd-src/commit/d912c694ee00de5ea0f46743295a0fc603cab562 I would suggest to remove the whole feature. bye, Jan Index: tcp.h === RCS file: /cvs/src/sys/netinet/tcp.h,v retrieving revision 1.21 diff -u -p -r1.21 tcp.h --- tcp.h 10 Jul 2019 18:45:31 - 1.21 +++ tcp.h 8 Feb 2021 17:52:38 - @@ -105,8 +105,6 @@ struct tcphdr { #defineTCP_MAX_SACK3 /* Max # SACKs sent in any segment */ #defineTCP_SACKHOLE_LIMIT 128 /* Max # SACK holes per connection */ -#defineTCP_MAXBURST4 /* Max # packets after leaving Fast Rxmit */ - /* * Default maximum segment size for TCP. * With an IP MSS of 576, this is 536, Index: tcp_output.c === RCS file: /cvs/src/sys/netinet/tcp_output.c,v retrieving revision 1.129 diff -u -p -r1.129 tcp_output.c --- tcp_output.c25 Jan 2021 03:40:46 - 1.129 +++ tcp_output.c8 Feb 2021 17:53:07 - @@ -203,7 +203,6 @@ tcp_output(struct tcpcb *tp) int idle, sendalot = 0; int i, sack_rxmit = 0; struct sackhole *p; - int maxburst = TCP_MAXBURST; #ifdef TCP_SIGNATURE unsigned int sigoff; #endif /* TCP_SIGNATURE */ @@ -1120,7 +1119,7 @@ out: tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~TF_ACKNOW; TCP_TIMER_DISARM(tp, TCPT_DELACK); - if (sendalot && --maxburst) + if (sendalot) goto again; return (0); }