Re: IPv4 on ix(4) slow/nothing - 7.4

2023-10-19 Thread Jan Klemkow
On Wed, Oct 18, 2023 at 08:53:44PM +0200, Alexander Bluhm wrote:
> On Wed, Oct 18, 2023 at 08:19:29PM +0200, Mischa wrote:
> > It's indeed something like that: ix -> vlan (tagged) -> veb
> 
> When vlan is added to veb, kernel should disable LRO on ix.
> All testing before release did not find this code path :-(
> 
> Is it possible to add vlan to veb first, and then add or change the
> vlan parent to ix?  If it works, that should also disable LRO.
> 
> Jan said he will have a look tomorrow.
> 
> trunk, carp, ... in veb or bridge might have the same issue.

First round of fixes for vlan(4), vxlan(4), nvgre(4) and bpe(4).

ok?

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.708
diff -u -p -r1.708 if.c
--- net/if.c16 Sep 2023 09:33:27 -  1.708
+++ net/if.c19 Oct 2023 13:03:33 -
@@ -3243,6 +3243,17 @@ ifsetlro(struct ifnet *ifp, int on)
struct ifreq ifrq;
int error = 0;
int s = splnet();
+   struct if_parent parent;
+
+   memset(&parent, 0, sizeof(parent));
+   if ((*ifp->if_ioctl)(ifp, SIOCGIFPARENT, (caddr_t)&parent) != -1) {
+   struct ifnet *ifp0 = if_unit(parent.ifp_parent);
+
+   if (ifp0 != NULL) {
+   ifsetlro(ifp0, on);
+   if_put(ifp0);
+   }
+   }
 
if (!ISSET(ifp->if_capabilities, IFCAP_LRO)) {
error = ENOTSUP;
Index: net/if_bpe.c
===
RCS file: /cvs/src/sys/net/if_bpe.c,v
retrieving revision 1.19
diff -u -p -r1.19 if_bpe.c
--- net/if_bpe.c8 Nov 2021 04:54:44 -   1.19
+++ net/if_bpe.c19 Oct 2023 13:20:18 -
@@ -631,6 +631,9 @@ bpe_set_parent(struct bpe_softc *sc, con
goto put;
}
 
+   if (ether_brport_isset(ifp))
+   ifsetlro(ifp0, 0);
+
/* commit */
sc->sc_key.k_if = ifp0->if_index;
etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
Index: net/if_gre.c
===
RCS file: /cvs/src/sys/net/if_gre.c,v
retrieving revision 1.174
diff -u -p -r1.174 if_gre.c
--- net/if_gre.c13 May 2023 13:35:17 -  1.174
+++ net/if_gre.c19 Oct 2023 13:24:56 -
@@ -3544,6 +3544,9 @@ nvgre_set_parent(struct nvgre_softc *sc,
return (EPROTONOSUPPORT);
}
 
+   if (ether_brport_isset(&sc->sc_ac.ac_if))
+   ifsetlro(ifp0, 0);
+
/* commit */
sc->sc_ifp0 = ifp0->if_index;
if_put(ifp0);
Index: net/if_vlan.c
===
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.215
diff -u -p -r1.215 if_vlan.c
--- net/if_vlan.c   16 May 2023 14:32:54 -  1.215
+++ net/if_vlan.c   19 Oct 2023 11:08:23 -
@@ -937,6 +937,9 @@ vlan_set_parent(struct vlan_softc *sc, c
if (error != 0)
goto put;
 
+   if (ether_brport_isset(ifp))
+   ifsetlro(ifp0, 0);
+
/* commit */
sc->sc_ifidx0 = ifp0->if_index;
if (!ISSET(sc->sc_flags, IFVF_LLADDR))
Index: net/if_vxlan.c
===
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.93
diff -u -p -r1.93 if_vxlan.c
--- net/if_vxlan.c  3 Aug 2023 09:49:08 -   1.93
+++ net/if_vxlan.c  19 Oct 2023 13:18:47 -
@@ -1582,6 +1582,9 @@ vxlan_set_parent(struct vxlan_softc *sc,
goto put;
}
 
+   if (ether_brport_isset(ifp))
+   ifsetlro(ifp0, 0);
+
/* commit */
sc->sc_if_index0 = ifp0->if_index;
etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);



TSO for ixl(4)

2023-10-18 Thread Jan Klemkow
Hi,

This diff implements TCP Segmentation Offloading for ixl(4).  I tested
it successfully on amd64 and sparc64 with Intel X710.  It should
increase the TCP bulk performance to 10 Gbit/s.  On sparc64 I got an
increase from 600 MBit/s to 2.000 Gbit/s.

Further testing is welcome.

bye,
Jan

Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.89
diff -u -p -r1.89 if_ixl.c
--- dev/pci/if_ixl.c29 Sep 2023 19:44:47 -  1.89
+++ dev/pci/if_ixl.c18 Oct 2023 15:15:30 -
@@ -71,6 +71,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #if NBPFILTER > 0
@@ -85,6 +86,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 
@@ -827,6 +830,10 @@ struct ixl_tx_desc {
 #define IXL_TX_DESC_BSIZE_MASK \
(IXL_TX_DESC_BSIZE_MAX << IXL_TX_DESC_BSIZE_SHIFT)
 
+#define IXL_TX_CTX_DESC_CMD_TSO0x10
+#define IXL_TX_CTX_DESC_TLEN_SHIFT 30
+#define IXL_TX_CTX_DESC_MSS_SHIFT  50
+
 #define IXL_TX_DESC_L2TAG1_SHIFT   48
 } __packed __aligned(16);
 
@@ -893,11 +900,19 @@ struct ixl_rx_wb_desc_32 {
uint64_tqword3;
 } __packed __aligned(16);
 
-#define IXL_TX_PKT_DESCS   8
+#define IXL_TX_PKT_DESCS   32
 #define IXL_TX_QUEUE_ALIGN 128
 #define IXL_RX_QUEUE_ALIGN 128
 
 #define IXL_HARDMTU9712 /* 9726 - ETHER_HDR_LEN */
+#define IXL_TSO_SIZE   ((255 * 1024) - 1)
+#define IXL_MAX_DMA_SEG_SIZE   ((16 * 1024) - 1)
+
+/*
+ * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES.
+ * This interface could not handle packets greater than IXL_TSO_SIZE.
+ */
+CTASSERT(MAXMCLBYTES < IXL_TSO_SIZE);
 
 #define IXL_PCIREG PCI_MAPREG_START
 
@@ -1958,6 +1973,7 @@ ixl_attach(struct device *parent, struct
ifp->if_capabilities |= IFCAP_CSUM_IPv4 |
IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+   ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
 
ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status);
 
@@ -2603,7 +2619,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi
txm = &maps[i];
 
if (bus_dmamap_create(sc->sc_dmat,
-   IXL_HARDMTU, IXL_TX_PKT_DESCS, IXL_HARDMTU, 0,
+   MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
&txm->txm_map) != 0)
goto uncreate;
@@ -2787,7 +2803,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
 }
 
 static uint64_t
-ixl_tx_setup_offload(struct mbuf *m0)
+ixl_tx_setup_offload(struct mbuf *m0, struct ixl_tx_ring *txr,
+unsigned int prod)
 {
struct ether_extracted ext;
uint64_t hlen;
@@ -2800,7 +2817,7 @@ ixl_tx_setup_offload(struct mbuf *m0)
}
 
if (!ISSET(m0->m_pkthdr.csum_flags,
-   M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
+   M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_TCP_TSO))
return (offload);
 
ether_extract_headers(m0, &ext);
@@ -2833,6 +2850,28 @@ ixl_tx_setup_offload(struct mbuf *m0)
offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT;
}
 
+   if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) {
+   if (ext.tcp) {
+   struct ixl_tx_desc *ring, *txd;
+   uint64_t cmd = 0;
+
+   hlen += ext.tcp->th_off << 2;
+   ring = IXL_DMA_KVA(&txr->txr_mem);
+   txd = &ring[prod];
+
+   cmd |= IXL_TX_DESC_DTYPE_CONTEXT;
+   cmd |= IXL_TX_CTX_DESC_CMD_TSO;
+   cmd |= (uint64_t)(m0->m_pkthdr.len - ETHER_HDR_LEN
+   - hlen) << IXL_TX_CTX_DESC_TLEN_SHIFT;
+   cmd |= (uint64_t)(m0->m_pkthdr.ph_mss)
+   << IXL_TX_CTX_DESC_MSS_SHIFT;
+
+   htolem64(&txd->addr, 0);
+   htolem64(&txd->cmd, cmd);
+   } else
+   tcpstat_inc(tcps_outbadtso);
+   }
+
return (offload);
 }
 
@@ -2873,7 +2912,8 @@ ixl_start(struct ifqueue *ifq)
mask = sc->sc_tx_ring_ndescs - 1;
 
for (;;) {
-   if (free <= IXL_TX_PKT_DESCS) {
+   /* We need one extra descriptor for TSO packets. */
+   if (free <= (IXL_TX_PKT_DESCS + 1)) {
ifq_set_oactive(ifq);
break;
}
@@ -2882,10 +2922,16 @@ ixl_start(struct ifqueue *ifq)
if (m == NULL)
break;
 
-   offload = ixl_tx_setup_offload(m);
+   offload = ixl_tx_setup_offload(m, txr, prod);
 
txm = &t

fix vlan handling with tcplro on ix(4)

2023-07-26 Thread Jan Klemkow
Hi,

I missed the vlan-tag size in the mss calculation of lro packets in
ix(4).  This diff add vlan-header detection in ether_extract_headers()
and uses this information to calculate the right mss.

This fixes forwarding of vlan tagged lro packets.

ok?

bye,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.200
diff -u -p -r1.200 if_ix.c
--- dev/pci/if_ix.c 18 Jul 2023 16:01:20 -  1.200
+++ dev/pci/if_ix.c 26 Jul 2023 09:21:15 -
@@ -3275,6 +3275,10 @@ ixgbe_rxeof(struct rx_ring *rxr)
/* Calculate header size. */
ether_extract_headers(sendmp, &ext);
hdrlen = sizeof(*ext.eh);
+#if NVLAN > 0
+   if (ext.evh)
+   hdrlen += ETHER_VLAN_ENCAP_LEN;
+#endif
if (ext.ip4)
hdrlen += ext.ip4->ip_hl << 2;
if (ext.ip6)
Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.290
diff -u -p -r1.290 if_ethersubr.c
--- net/if_ethersubr.c  6 Jul 2023 19:46:53 -   1.290
+++ net/if_ethersubr.c  26 Jul 2023 09:20:57 -
@@ -1040,6 +1040,7 @@ ether_extract_headers(struct mbuf *mp, s
uint64_t hlen;
int  hoff;
uint8_t  ipproto;
+   uint16_t ether_type;
 
/* Return NULL if header was not recognized. */
memset(ext, 0, sizeof(*ext));
@@ -1048,9 +1049,20 @@ ether_extract_headers(struct mbuf *mp, s
return;
 
ext->eh = mtod(mp, struct ether_header *);
-   switch (ntohs(ext->eh->ether_type)) {
+   ether_type = ntohs(ext->eh->ether_type);
+   hlen = sizeof(*ext->eh);
+
+#if NVLAN > 0
+   if (ether_type == ETHERTYPE_VLAN) {
+   ext->evh = mtod(mp, struct ether_vlan_header *);
+   ether_type = ntohs(ext->evh->evl_proto);
+   hlen = sizeof(*ext->evh);
+   }
+#endif
+
+   switch (ether_type) {
case ETHERTYPE_IP:
-   m = m_getptr(mp, sizeof(*ext->eh), &hoff);
+   m = m_getptr(mp, hlen, &hoff);
if (m == NULL || m->m_len - hoff < sizeof(*ext->ip4))
return;
ext->ip4 = (struct ip *)(mtod(m, caddr_t) + hoff);
@@ -1064,7 +1076,7 @@ ether_extract_headers(struct mbuf *mp, s
break;
 #ifdef INET6
case ETHERTYPE_IPV6:
-   m = m_getptr(mp, sizeof(*ext->eh), &hoff);
+   m = m_getptr(mp, hlen, &hoff);
if (m == NULL || m->m_len - hoff < sizeof(*ext->ip6))
return;
ext->ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
Index: netinet/if_ether.h
===
RCS file: /cvs/src/sys/netinet/if_ether.h,v
retrieving revision 1.89
diff -u -p -r1.89 if_ether.h
--- netinet/if_ether.h  6 Jul 2023 19:46:53 -   1.89
+++ netinet/if_ether.h  26 Jul 2023 09:20:22 -
@@ -301,11 +301,12 @@ uint64_t  ether_addr_to_e64(const struct 
 void   ether_e64_to_addr(struct ether_addr *, uint64_t);
 
 struct ether_extracted {
-   struct ether_header *eh;
-   struct ip   *ip4;
-   struct ip6_hdr  *ip6;
-   struct tcphdr   *tcp;
-   struct udphdr   *udp;
+   struct ether_header *eh;
+   struct ether_vlan_header*evh;
+   struct ip   *ip4;
+   struct ip6_hdr  *ip6;
+   struct tcphdr   *tcp;
+   struct udphdr   *udp;
 };
 
 void ether_extract_headers(struct mbuf *, struct ether_extracted *);



ixl(4): protect admin queue with mutex

2023-07-19 Thread Jan Klemkow
Hi,

there is an issue with the admin queue of ixl(4) which leads into the
following panic when the link state changes:

uvm_fault(0x818005f8, 0x18, 0, 2) -> e
kernel: page fault trap, code=0
Stopped at  ixl_intr0+0xca: movq%rdx,0x18(%rax)
TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
 392823  13219  00x100040  02  ifstated
 444681  94950 90   0x1100010  06  ospf6d
 428704   9496 90   0x1100010  09  ospf6d
 106020  59273 85   0x1100010  01  ospfd
 420435  72114 85   0x1100010  05  ospfd
 295821  93368 73   0x1100010  03  syslogd
 367116  56598  0 0x14000  0x2007  zerothread
 275385  57815  0 0x14000  0x2004  softnet
ixl_intr0(84509000) at ixl_intr0+0xca
intr_handler(0,844b0b80) at intr_handler+0x5b
Xintr_ioapic_edge25_untramp() at Xintr_ioapic_edge25_untramp+0x18f
acpicpu_idle() at acpicpu_idle+0x1f6
sched_idle(0) at sched_idle+0x280
end trace frame: 0x0, count: 10
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{0}>

The queue is corrupted in a way, that slot->iaq_cookie is 0.  Which
causes the uvm fault when iatq is dereferenced.

The following diff uses a mutex to protect the admin queue and avoids
the issue above.

ok?

bye,
Jan

Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.87
diff -u -p -r1.87 if_ixl.c
--- dev/pci/if_ixl.c6 Feb 2023 20:27:45 -   1.87
+++ dev/pci/if_ixl.c19 Jul 2023 07:05:40 -
@@ -1274,6 +1274,7 @@ struct ixl_softc {
unsigned int sc_atq_prod;
unsigned int sc_atq_cons;
 
+   struct mutex sc_atq_mtx;
struct ixl_dmamemsc_arq;
struct task  sc_arq_task;
struct ixl_aq_bufs   sc_arq_idle;
@@ -1723,6 +1724,8 @@ ixl_attach(struct device *parent, struct
 
/* initialise the adminq */
 
+   mtx_init(&sc->sc_atq_mtx, IPL_NET);
+
if (ixl_dmamem_alloc(sc, &sc->sc_atq,
sizeof(struct ixl_aq_desc) * IXL_AQ_NUM, IXL_AQ_ALIGN) != 0) {
printf("\n" "%s: unable to allocate atq\n", DEVNAME(sc));
@@ -3599,6 +3602,8 @@ ixl_atq_post(struct ixl_softc *sc, struc
struct ixl_aq_desc *atq, *slot;
unsigned int prod;
 
+   mtx_enter(&sc->sc_atq_mtx);
+
/* assert locked */
 
atq = IXL_DMA_KVA(&sc->sc_atq);
@@ -3618,6 +3623,8 @@ ixl_atq_post(struct ixl_softc *sc, struc
prod &= IXL_AQ_MASK;
sc->sc_atq_prod = prod;
ixl_wr(sc, sc->sc_aq_regs->atq_tail, prod);
+
+   mtx_leave(&sc->sc_atq_mtx);
 }
 
 static void
@@ -3628,11 +3635,15 @@ ixl_atq_done(struct ixl_softc *sc)
unsigned int cons;
unsigned int prod;
 
+   mtx_enter(&sc->sc_atq_mtx);
+
prod = sc->sc_atq_prod;
cons = sc->sc_atq_cons;
 
-   if (prod == cons)
+   if (prod == cons) {
+   mtx_leave(&sc->sc_atq_mtx);
return;
+   }
 
atq = IXL_DMA_KVA(&sc->sc_atq);
 
@@ -3645,6 +3656,7 @@ ixl_atq_done(struct ixl_softc *sc)
if (!ISSET(slot->iaq_flags, htole16(IXL_AQ_DD)))
break;
 
+   KASSERT(slot->iaq_cookie != 0);
iatq = (struct ixl_atq *)slot->iaq_cookie;
iatq->iatq_desc = *slot;
 
@@ -3661,6 +3673,8 @@ ixl_atq_done(struct ixl_softc *sc)
BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 
sc->sc_atq_cons = cons;
+
+   mtx_leave(&sc->sc_atq_mtx);
 }
 
 static void
@@ -3691,6 +3705,8 @@ ixl_atq_poll(struct ixl_softc *sc, struc
unsigned int prod;
unsigned int t = 0;
 
+   mtx_enter(&sc->sc_atq_mtx);
+
atq = IXL_DMA_KVA(&sc->sc_atq);
prod = sc->sc_atq_prod;
slot = atq + prod;
@@ -3712,8 +3728,10 @@ ixl_atq_poll(struct ixl_softc *sc, struc
while (ixl_rd(sc, sc->sc_aq_regs->atq_head) != prod) {
delaymsec(1);
 
-   if (t++ > tm)
+   if (t++ > tm) {
+   mtx_leave(&sc->sc_atq_mtx);
return (ETIMEDOUT);
+   }
}
 
bus_dmamap_sync(sc->sc_dmat, IXL_DMA_MAP(&sc->sc_atq),
@@ -3724,6 +3742,7 @@ ixl_atq_poll(struct ixl_softc *sc, struc
 
sc->sc_atq_cons = prod;
 
+   mtx_leave(&sc->sc_atq_mtx);
return (0);
 }
 



Re: tcp lro by default, call for testing

2023-07-10 Thread Jan Klemkow
On Sat, Jul 08, 2023 at 05:15:26PM +0300, Alexander Bluhm wrote:
> I am not aware of any more limitations when enabling LRO for TCP
> in the network drivers.  The feature allows to receive agregated
> packets larger than the MTU.  Receiving TCP streams becomes much
> faster.
> 
> As the network hardware is not aware whether a packet is received
> locally or forwarded, everything is aggregated.  In case of forwarding
> it is split on output to packets not larger than the original
> packets.  So path MTU discovery should still work.  If the outgoing
> interface supports TSO, the packet is chopped in hardware.
> 
> Currently only ix(4) and lo(4) support LRO, and ix(4) is limited
> to IPv4 and newer than the old 82598 model.  If the interface is
> added to a bridge(4) or aggr(4), LRO is automatically disabled.

I guess you mean veb(4) not aggr(4).  We just avoid the in heritage
of the LRO capability in aggr(4) but are using the feature.

> So in case you possess any ix(4) hardware or do funky pf routing
> on lo(4) please run this diff.  If you encounter problems, report
> and turn LRO off per interface with ifconfig -tcplro.

Diff looks fine to me.  I just would keep mentioning the default
behavior in the manpage like this:

ok jan@

Index: sbin/ifconfig/ifconfig.8
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.397
diff -u -p -r1.397 ifconfig.8
--- sbin/ifconfig/ifconfig.87 Jun 2023 18:42:40 -   1.397
+++ sbin/ifconfig/ifconfig.810 Jul 2023 11:54:47 -
@@ -517,9 +517,9 @@ It is not possible to use LRO with inter
 or
 .Xr tpmr 4 .
 Changing this option will re-initialize the network interface.
+LRO is enabled by default.
 .It Cm -tcplro
 Disable LRO.
-LRO is disabled by default.
 .It Cm up
 Mark an interface
 .Dq up .


> Index: sys/dev/pci/if_ix.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
> retrieving revision 1.198
> diff -u -p -r1.198 if_ix.c
> --- sys/dev/pci/if_ix.c   8 Jul 2023 09:01:30 -   1.198
> +++ sys/dev/pci/if_ix.c   8 Jul 2023 13:51:26 -
> @@ -1925,8 +1925,10 @@ ixgbe_setup_interface(struct ix_softc *s
>   ifp->if_capabilities |= IFCAP_CSUM_IPv4;
>  
>   ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
> - if (sc->hw.mac.type != ixgbe_mac_82598EB)
> + if (sc->hw.mac.type != ixgbe_mac_82598EB) {
> + ifp->if_xflags |= IFXF_LRO;
>   ifp->if_capabilities |= IFCAP_LRO;
> + }
>  
>   /*
>* Specify the media types supported by this sc and register
> Index: sys/net/if_loop.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
> retrieving revision 1.95
> diff -u -p -r1.95 if_loop.c
> --- sys/net/if_loop.c 2 Jul 2023 19:59:15 -   1.95
> +++ sys/net/if_loop.c 8 Jul 2023 13:51:26 -
> @@ -172,11 +172,11 @@ loop_clone_create(struct if_clone *ifc, 
>   ifp->if_softc = NULL;
>   ifp->if_mtu = LOMTU;
>   ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
> - ifp->if_xflags = IFXF_CLONED;
> + ifp->if_xflags = IFXF_CLONED | IFXF_LRO;
>   ifp->if_capabilities = IFCAP_CSUM_IPv4 |
>   IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
>   IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 |
> - IFCAP_LRO;
> + IFCAP_LRO | IFCAP_TSOv4 | IFCAP_TSOv6;
>   ifp->if_rtrequest = lortrequest;
>   ifp->if_ioctl = loioctl;
>   ifp->if_input = loinput;
> Index: sbin/ifconfig/ifconfig.8
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sbin/ifconfig/ifconfig.8,v
> retrieving revision 1.397
> diff -u -p -r1.397 ifconfig.8
> --- sbin/ifconfig/ifconfig.8  7 Jun 2023 18:42:40 -   1.397
> +++ sbin/ifconfig/ifconfig.8  7 Jul 2023 19:57:09 -
> @@ -519,7 +519,6 @@ or
>  Changing this option will re-initialize the network interface.
>  .It Cm -tcplro
>  Disable LRO.
> -LRO is disabled by default.
>  .It Cm up
>  Mark an interface
>  .Dq up .
> 



Re: tcp lro tso path mtu

2023-07-06 Thread Jan Klemkow
On Thu, Jul 06, 2023 at 10:19:21PM +0300, Alexander Bluhm wrote:
> On Thu, Jul 06, 2023 at 08:49:03PM +0200, Jan Klemkow wrote:
> > > @@ -109,6 +109,9 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > 
> > I think is a merge bug, isn't it?
> > 
> > > +#include 
> > > +#include 
> > > +#include 
> 
> Right.
> 
> > > + error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu);
> > > + if (error || *mp == NULL)
> > > + return error;
> > > +
> > > + if ((*mp)->m_pkthdr.len <= mtu) {
> > 
> > I may miss something but...
> > 
> > Couldn't you move the *_cksum_out() calls above the upper
> > tcp_if_output_tso() call?  And than remove the *_cksum_out() calls
> > inside of tcp_if_output_tso()?
> > 
> > Thus, there is just one place where we call them.
> > 
> > > + switch (dst->sa_family) {
> > > + case AF_INET:
> > > + in_hdr_cksum_out(*mp, ifp);
> > > + in_proto_cksum_out(*mp, ifp);
> > > + break;
> > > +#ifdef INET6
> > > + case AF_INET6:
> > > + in6_proto_cksum_out(*mp, ifp);
> > > + break;
> > > +#endif
> 
> There is the case in tcp_if_output_tso() where we call tcp_chopper().
> Then checksum has to be calcualted after chopping.  If I do it
> always before tcp_if_output_tso(), we may caluclate it twice.  Once
> for the large packet and once for the small ones.
> 
> New diff without duplicate includes.

tested with v4/v6, direct and forwarding.

ok jan@

> Index: net/if.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
> retrieving revision 1.704
> diff -u -p -r1.704 if.c
> --- net/if.c  6 Jul 2023 04:55:04 -   1.704
> +++ net/if.c  6 Jul 2023 19:15:00 -
> @@ -886,6 +886,57 @@ if_output_ml(struct ifnet *ifp, struct m
>  }
>  
>  int
> +if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
> +struct rtentry *rt, u_int mtu)
> +{
> + uint32_t ifcap;
> + int error;
> +
> + switch (dst->sa_family) {
> + case AF_INET:
> + ifcap = IFCAP_TSOv4;
> + break;
> +#ifdef INET6
> + case AF_INET6:
> + ifcap = IFCAP_TSOv6;
> + break;
> +#endif
> + default:
> + unhandled_af(dst->sa_family);
> + }
> +
> + /*
> +  * Try to send with TSO first.  When forwarding LRO may set
> +  * maximium segment size in mbuf header.  Chop TCP segment
> +  * even if it would fit interface MTU to preserve maximum
> +  * path MTU.
> +  */
> + error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu);
> + if (error || *mp == NULL)
> + return error;
> +
> + if ((*mp)->m_pkthdr.len <= mtu) {
> + switch (dst->sa_family) {
> + case AF_INET:
> + in_hdr_cksum_out(*mp, ifp);
> + in_proto_cksum_out(*mp, ifp);
> + break;
> +#ifdef INET6
> + case AF_INET6:
> + in6_proto_cksum_out(*mp, ifp);
> + break;
> +#endif
> + }
> + error = ifp->if_output(ifp, *mp, dst, rt);
> + *mp = NULL;
> + return error;
> + }
> +
> + /* mp still contains mbuf that has to be fragmented or dropped. */
> + return 0;
> +}
> +
> +int
>  if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total,
>  struct sockaddr *dst, struct rtentry *rt)
>  {
> Index: net/if_var.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
> retrieving revision 1.128
> diff -u -p -r1.128 if_var.h
> --- net/if_var.h  28 Jun 2023 11:49:49 -  1.128
> +++ net/if_var.h  6 Jul 2023 19:12:39 -
> @@ -329,6 +329,8 @@ int   if_output_ml(struct ifnet *, struct 
>   struct sockaddr *, struct rtentry *);
>  int  if_output_mq(struct ifnet *, struct mbuf_queue *, unsigned int *,
>   struct sockaddr *, struct rtentry *);
> +int  if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *,
> + struct rtentry *, u_int);
>  int  if_output_local(struct ifnet *, struct mbuf *, sa_family_t);
>  void if_rtrequest_dummy(struct ifnet *, int, struct rtentry *);
>  void p2p_rtrequest(struct ifnet *, int, struct rtentry *);
> Index: net/pf.c
> 

Re: tcp lro tso path mtu

2023-07-06 Thread Jan Klemkow
On Mon, Jul 03, 2023 at 08:04:11PM +0300, Alexander Bluhm wrote:
> As final step before making LRO (Large Receive Offload) the default,
> we have to fix path MTU discovery when forwarding.
> 
> The drivers, currently ix(4) and lo(4) only, record an upper bound
> of the size of the original packets in ph_mss.  When sending we
> must chop the packets with TSO (TCP Segmentation Offload) to that
> size.  That means we have to call tcp_if_output_tso() before
> ifp->if_output().  I have put that logic into if_output_tso() to
> avoid code duplication.
> 
> ok?

I like the idea of this commit.  Some comments below.

Thanks,
Jan

> Index: net/if.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
> retrieving revision 1.702
> diff -u -p -r1.702 if.c
> --- net/if.c  2 Jul 2023 19:59:15 -   1.702
> +++ net/if.c  3 Jul 2023 10:28:30 -
> @@ -109,6 +109,9 @@
>  #include 
>  #include 
>  #include 

I think is a merge bug, isn't it?

> +#include 
> +#include 
> +#include 

> @@ -883,6 +886,57 @@ if_output_ml(struct ifnet *ifp, struct m
>   ml_purge(ml);
>  
>   return error;
> +}
> +
> +int
> +if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
> +struct rtentry *rt, u_int mtu)
> +{
> + uint32_t ifcap;
> + int error;
> +
> + switch (dst->sa_family) {
> + case AF_INET:
> + ifcap = IFCAP_TSOv4;
> + break;
> +#ifdef INET6
> + case AF_INET6:
> + ifcap = IFCAP_TSOv6;
> + break;
> +#endif
> + default:
> + unhandled_af(dst->sa_family);
> + }
> +
> + /*
> +  * Try to send with TSO first.  When forwarding LRO may set
> +  * maximium segment size in mbuf header.  Chop TCP segment
> +  * even if it would fit interface MTU to preserve maximum
> +  * path MTU.
> +  */
> + error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu);
> + if (error || *mp == NULL)
> + return error;
> +
> + if ((*mp)->m_pkthdr.len <= mtu) {

I may miss something but...

Couldn't you move the *_cksum_out() calls above the upper
tcp_if_output_tso() call?  And than remove the *_cksum_out() calls
inside of tcp_if_output_tso()?

Thus, there is just one place where we call them.

> + switch (dst->sa_family) {
> + case AF_INET:
> + in_hdr_cksum_out(*mp, ifp);
> + in_proto_cksum_out(*mp, ifp);
> + break;
> +#ifdef INET6
> + case AF_INET6:
> + in6_proto_cksum_out(*mp, ifp);
> + break;
> +#endif
> + }
> + error = ifp->if_output(ifp, *mp, dst, rt);
> + *mp = NULL;
> + return error;
> + }
> +
> + /* mp still contains mbuf that has to be fragmented or dropped. */
> + return 0;
>  }



Add ethernet type check in ifsetlro()

2023-07-03 Thread Jan Klemkow
Hi,

bluhm pointed out that the ether_brport_isset() check it just allowed on
ethernet devices.  Thus, I put an additional ethernet check in the
condition.  This also fixes EBUSY errors of "ifconfig lo0 tcplro" calls
in my setup.

ok?

bye,
Jan

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.702
diff -u -p -r1.702 if.c
--- net/if.c2 Jul 2023 19:59:15 -   1.702
+++ net/if.c3 Jul 2023 20:58:32 -
@@ -3206,7 +3206,7 @@ ifsetlro(struct ifnet *ifp, int on)
KERNEL_ASSERT_LOCKED(); /* for if_flags */
 
if (on && !ISSET(ifp->if_xflags, IFXF_LRO)) {
-   if (ether_brport_isset(ifp)) {
+   if (ifp->if_type == IFT_ETHER && ether_brport_isset(ifp)) {
error = EBUSY;
goto out;
}



Re: lo(4) loopback LRO and TSO

2023-07-02 Thread Jan Klemkow



On July 2, 2023 2:33:41 PM GMT+02:00, Claudio Jeker  
wrote:
>On Sun, Jul 02, 2023 at 02:28:17PM +0200, Alexander Bluhm wrote:
>> anyone?
>
>Was not able to test yet but I like the diff.
>Right now this is a noop since LRO is not on by default for lo(4).
>Because of that OK claudio@

The diff works fine in my sparc64 setup.
ok jan@

>> On Fri, Jun 23, 2023 at 06:06:16PM +0200, Alexander Bluhm wrote:
>> > Hi,
>> > 
>> > Claudio@ mentioned the idea to use TSO and LRO on the loopback
>> > interface to transfer TCP faster.
>> > 
>> > I see a performance effect with this diff, but more importantly it
>> > gives us more test coverage.  Currently LRO on lo(4) is default
>> > off.
>> > 
>> > Future plan is:
>> > - Fix some corner cases for LRO/TSO with TCP path-MTU discovery
>> >   and IP forwarding when LRO is enabled.
>> > - Enable LRO/TSO for lo(4) and ix(4) per default.
>> > - Jan@ commits his ixl(4) TSO diff.
>> > 
>> > ok for lo(4) LRO/TSO with default off?
>> > 
>> > bluhm
>> > 
>> > Index: sys/net/if.c
>> > ===
>> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
>> > retrieving revision 1.700
>> > diff -u -p -r1.700 if.c
>> > --- sys/net/if.c   12 Jun 2023 21:19:54 -  1.700
>> > +++ sys/net/if.c   23 Jun 2023 15:48:27 -
>> > @@ -106,6 +106,9 @@
>> >  #ifdef MROUTING
>> >  #include 
>> >  #endif
>> > +#include 
>> > +#include 
>> > +#include 
>> >  
>> >  #ifdef INET6
>> >  #include 
>> > @@ -802,12 +805,29 @@ if_input_local(struct ifnet *ifp, struct
>> > * is now incorrect, will be calculated before sending.
>> > */
>> >keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT |
>> > -  M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT);
>> > +  M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT |
>> > +  M_TCP_TSO);
>> >m_resethdr(m);
>> >m->m_flags |= M_LOOP | keepflags;
>> >m->m_pkthdr.csum_flags = keepcksum;
>> >m->m_pkthdr.ph_ifidx = ifp->if_index;
>> >m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
>> > +
>> > +  if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) {
>> > +  if (ifp->if_mtu > 0 &&
>> > +  ((af == AF_INET &&
>> > +  ISSET(ifp->if_capabilities, IFCAP_TSOv4)) ||
>> > +  (af == AF_INET6 &&
>> > +  ISSET(ifp->if_capabilities, IFCAP_TSOv6 {
>> > +  tcpstat_inc(tcps_inswlro);
>> > +  tcpstat_add(tcps_inpktlro,
>> > +  (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu);
>> > +  } else {
>> > +  tcpstat_inc(tcps_inbadlro);
>> > +  m_freem(m);
>> > +  return (EPROTONOSUPPORT);
>> > +  }
>> > +  }
>> >  
>> >if (ISSET(keepcksum, M_TCP_CSUM_OUT))
>> >m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
>> > Index: sys/net/if_loop.c
>> > ===
>> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
>> > retrieving revision 1.94
>> > diff -u -p -r1.94 if_loop.c
>> > --- sys/net/if_loop.c  5 Jun 2023 11:35:46 -   1.94
>> > +++ sys/net/if_loop.c  23 Jun 2023 15:48:27 -
>> > @@ -175,7 +175,8 @@ loop_clone_create(struct if_clone *ifc, 
>> >ifp->if_xflags = IFXF_CLONED;
>> >ifp->if_capabilities = IFCAP_CSUM_IPv4 |
>> >IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
>> > -  IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
>> > +  IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 |
>> > +  IFCAP_LRO;
>> >ifp->if_rtrequest = lortrequest;
>> >ifp->if_ioctl = loioctl;
>> >ifp->if_input = loinput;
>> > @@ -281,6 +282,10 @@ loioctl(struct ifnet *ifp, u_long cmd, c
>> >  
>> >switch (cmd) {
>> >case SIOCSIFFLAGS:
>> > +  if (ISSET(ifp->if_xflags, IFXF_LRO))
>> > +  SET(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6);
>> > +  else
>> > +  CLR(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6);
>> >break;
>> >  
>> >case SIOCSIFADDR:
>> > Index: sys/netinet/tcp_usrreq.c
>> > ===
>> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
>> > retrieving revision 1.219
>> > diff -u -p -r1.219 tcp_usrreq.c
>> > --- sys/netinet/tcp_usrreq.c   23 May 2023 09:16:16 -  1.219
>> > +++ sys/netinet/tcp_usrreq.c   23 Jun 2023 15:48:27 -
>> > @@ -1340,6 +1340,7 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o
>> >ASSIGN(tcps_outhwtso);
>> >ASSIGN(tcps_outpkttso);
>> >ASSIGN(tcps_outbadtso);
>> > +  ASSIGN(tcps_inswlro);
>> >ASSIGN(tcps_inhwlro);
>> >ASSIGN(tcps_inpktlro);
>> >ASSIGN(tcps_inbadlro);
>> > Index: sys/netinet/tcp_var.h
>> > ===
>> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
>> > retrieving revision 1.167
>> > diff -u -p -r1.167 tcp_var.h
>> > 

Re: tso ip6 forward

2023-06-16 Thread Jan Klemkow
On Fri, Jun 16, 2023 at 12:06:08PM +0200, Alexander Bluhm wrote:
> On Mon, Jun 12, 2023 at 03:46:28PM +0200, Alexander Bluhm wrote:
> > I found a little inconsistency in IPv6 forwarding with TSO.
> > 
> > Sending with TSO should only done if the large packet does not fit
> > in the interface MTU.  In case tcp_if_output_tso() does not process
> > the packet, we should send an ICMP6 error.  Rearrange the code that
> > it looks more like other calls to tcp_if_output_tso().
> > 
> > All these cases can only be reached when LRO is turned on for IPv6
> > which none of our drivers currently supports.
> 
> jan@ pointed out that reordering TSO in ip6 forward breaks path MTU
> discovery.  So lets only fix the forward counters, icmp6 packet too
> big and icmp6 redirect.
> 
> First try to send with TSO.  The goto senderr handles icmp6 redirect
> and other errors.
> 
> If TSO is not necessary and the interface MTU fits, just send the
> packet.  Again goto senderr handles icmp6.
> 
> Finally care about icmp6 packet too big.

Works fine in my setup.

> ok?

ok jan@

> Index: netinet6/ip6_forward.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
> retrieving revision 1.110
> diff -u -p -r1.110 ip6_forward.c
> --- netinet6/ip6_forward.c1 Jun 2023 09:05:33 -   1.110
> +++ netinet6/ip6_forward.c16 Jun 2023 08:55:43 -
> @@ -321,35 +321,30 @@ reroute:
>  
>   error = tcp_if_output_tso(ifp, &m, sin6tosa(sin6), rt, IFCAP_TSOv6,
>   ifp->if_mtu);
> + if (error)
> + ip6stat_inc(ip6s_cantforward);
> + else if (m == NULL)
> + ip6stat_inc(ip6s_forward);
>   if (error || m == NULL)
> - goto freecopy;
> + goto senderr;
>  
>   /* Check the size after pf_test to give pf a chance to refragment. */
> - if (m->m_pkthdr.len > ifp->if_mtu) {
> - if (mcopy)
> - icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0,
> - ifp->if_mtu);
> - m_freem(m);
> - goto out;
> + if (m->m_pkthdr.len <= ifp->if_mtu) {
> + in6_proto_cksum_out(m, ifp);
> + error = ifp->if_output(ifp, m, sin6tosa(sin6), rt);
> + if (error)
> + ip6stat_inc(ip6s_cantforward);
> + else
> + ip6stat_inc(ip6s_forward);
> + goto senderr;
>   }
>  
> - in6_proto_cksum_out(m, ifp);
> - error = ifp->if_output(ifp, m, sin6tosa(sin6), rt);
> - if (error) {
> - ip6stat_inc(ip6s_cantforward);
> - } else {
> - ip6stat_inc(ip6s_forward);
> - if (type)
> - ip6stat_inc(ip6s_redirectsent);
> - else {
> - if (mcopy)
> - goto freecopy;
> - }
> - }
> + if (mcopy != NULL)
> + icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
> + m_freem(m);
> + goto out;
>  
> -#if NPF > 0 || defined(IPSEC)
>  senderr:
> -#endif
>   if (mcopy == NULL)
>   goto out;
>  
> @@ -357,6 +352,7 @@ senderr:
>   case 0:
>   if (type == ND_REDIRECT) {
>   icmp6_redirect_output(mcopy, rt);
> + ip6stat_inc(ip6s_redirectsent);
>   goto out;
>   }
>   goto freecopy;
> 



Re: ix(4): allocate less memory for tx buffers

2023-06-09 Thread Jan Klemkow
On Fri, Jun 09, 2023 at 06:59:57PM +0200, Jan Klemkow wrote:
> On Fri, Jun 09, 2023 at 06:11:38PM +0200, Jan Klemkow wrote:
> > TSO packets are limited to MAXMCLBYTES (64k).  Thus, we don't need to
> > allocate IXGBE_TSO_SIZE (256k) per packet for the transmit buffers.
> > 
> > This saves 3/4 of the memory and allows me to pack over 8 ix(8) ports
> > into one machine.  Otherwise I run out of devbuf in malloc(9).
> 
> fix typo in comment

Use a more precise compare in the CTASSERT condition.

ok?

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.197
diff -u -p -r1.197 if_ix.c
--- dev/pci/if_ix.c 1 Jun 2023 09:05:33 -   1.197
+++ dev/pci/if_ix.c 9 Jun 2023 16:01:18 -
@@ -37,6 +37,12 @@
 #include 
 #include 
 
+/*
+ * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES.
+ * This interface could not handle packets greater than IXGBE_TSO_SIZE.
+ */
+CTASSERT(MAXMCLBYTES <= IXGBE_TSO_SIZE);
+
 /*
  *  Driver version
  */
@@ -2263,7 +2269,7 @@ ixgbe_allocate_transmit_buffers(struct t
/* Create the descriptor buffer dma maps */
for (i = 0; i < sc->num_tx_desc; i++) {
txbuf = &txr->tx_buffers[i];
-   error = bus_dmamap_create(txr->txdma.dma_tag, IXGBE_TSO_SIZE,
+   error = bus_dmamap_create(txr->txdma.dma_tag, MAXMCLBYTES,
sc->num_segs, PAGE_SIZE, 0,
BUS_DMA_NOWAIT, &txbuf->map);
 



Re: ix(4): allocate less memory for tx buffers

2023-06-09 Thread Jan Klemkow
On Fri, Jun 09, 2023 at 06:11:38PM +0200, Jan Klemkow wrote:
> TSO packets are limited to MAXMCLBYTES (64k).  Thus, we don't need to
> allocate IXGBE_TSO_SIZE (256k) per packet for the transmit buffers.
> 
> This saves 3/4 of the memory and allows me to pack over 8 ix(8) ports
> into one machine.  Otherwise I run out of devbuf in malloc(9).
> 
> ok?

fix typo in comment

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.197
diff -u -p -r1.197 if_ix.c
--- dev/pci/if_ix.c 1 Jun 2023 09:05:33 -   1.197
+++ dev/pci/if_ix.c 9 Jun 2023 16:01:18 -
@@ -37,6 +37,12 @@
 #include 
 #include 
 
+/*
+ * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES.
+ * This interface could not handle packets greater than IXGBE_TSO_SIZE.
+ */
+CTASSERT(MAXMCLBYTES < IXGBE_TSO_SIZE);
+
 /*
  *  Driver version
  */
@@ -2263,7 +2269,7 @@ ixgbe_allocate_transmit_buffers(struct t
/* Create the descriptor buffer dma maps */
for (i = 0; i < sc->num_tx_desc; i++) {
txbuf = &txr->tx_buffers[i];
-   error = bus_dmamap_create(txr->txdma.dma_tag, IXGBE_TSO_SIZE,
+   error = bus_dmamap_create(txr->txdma.dma_tag, MAXMCLBYTES,
sc->num_segs, PAGE_SIZE, 0,
BUS_DMA_NOWAIT, &txbuf->map);
 



ix(4): allocate less memory for tx buffers

2023-06-09 Thread Jan Klemkow
Hi,

TSO packets are limited to MAXMCLBYTES (64k).  Thus, we don't need to
allocate IXGBE_TSO_SIZE (256k) per packet for the transmit buffers.

This saves 3/4 of the memory and allows me to pack over 8 ix(8) ports
into one machine.  Otherwise I run out of devbuf in malloc(9).

ok?

bye,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.197
diff -u -p -r1.197 if_ix.c
--- dev/pci/if_ix.c 1 Jun 2023 09:05:33 -   1.197
+++ dev/pci/if_ix.c 9 Jun 2023 16:01:18 -
@@ -37,6 +37,12 @@
 #include 
 #include 
 
+/*
+ * Our TCP/IP Stack could not handle packets greater then MAXMCLBYTES.
+ * This interface could not handle packets greater then IXGBE_TSO_SIZE.
+ */
+CTASSERT(MAXMCLBYTES < IXGBE_TSO_SIZE);
+
 /*
  *  Driver version
  */
@@ -2263,7 +2269,7 @@ ixgbe_allocate_transmit_buffers(struct t
/* Create the descriptor buffer dma maps */
for (i = 0; i < sc->num_tx_desc; i++) {
txbuf = &txr->tx_buffers[i];
-   error = bus_dmamap_create(txr->txdma.dma_tag, IXGBE_TSO_SIZE,
+   error = bus_dmamap_create(txr->txdma.dma_tag, MAXMCLBYTES,
sc->num_segs, PAGE_SIZE, 0,
BUS_DMA_NOWAIT, &txbuf->map);
 



Re: ifconfig rename tcplro

2023-06-07 Thread Jan Klemkow
On Wed, Jun 07, 2023 at 02:49:07PM +0300, Vitaliy Makkoveev wrote:
> On Wed, Jun 07, 2023 at 01:29:09PM +0200, Alexander Bluhm wrote:
> > On Wed, Jun 07, 2023 at 12:59:11PM +0300, Vitaliy Makkoveev wrote:
> > > On Wed, Jun 07, 2023 at 10:19:32AM +1000, David Gwynne wrote:
> > > > 
> > > > 
> > > > > On 7 Jun 2023, at 06:33, Vitaliy Makkoveev  wrote:
> > > > > 
> > > > >> On 6 Jun 2023, at 20:29, Alexander Bluhm  
> > > > >> wrote:
> > > > >> 
> > > > >> On Tue, Jun 06, 2023 at 05:54:31PM +0300, Vitaliy Makkoveev wrote:
> > > > >>> On Tue, Jun 06, 2023 at 02:31:52PM +0200, Alexander Bluhm wrote:
> > > >  Hi,
> > > >  
> > > >  I would suggest to rename ifconfig tcprecvoffload to tcplro.  Maybe
> > > >  it's just because I had to type that long name too often.
> > > >  
> > > >  With that we have consistent naming:
> > > >  # ifconfig ix0 tcplro
> > > >  # sysctl net.inet.tcp.tso=1
> > > >  
> > > >  Also the coresponding flag are named LRO.
> > > >  # ifconfig ix1 hwfeatures
> > > >  ix1: flags=2008843 mtu 
> > > >  1500
> > > >    
> > > >  hwfeatures=71b7
> > > >   hardmtu 9198
> > > >  
> > > >  The feature is quite new, so I have no backward compatiblity 
> > > >  concerns.
> > > >  
> > > >  ok?
> > > >  
> > > > >>> 
> > > > >>> Could you name it "lro" like FreeBSD uses?
> > > > >> 
> > > > >> When I started with this, LRO and TSO were unknown to me.  So with
> > > > >> TCP prefix it may be clearer to users where the feature belongs.
> > > > >> 
> > > > >> Naming is hard.
> > > > > 
> > > > > Yeah, naming is definitely hard. I propose to use lro because it is
> > > > > already used for the same purpose by FreeBSD, so the same name helps
> > > > > to avoid confusion.
> > > > > 
> > > > >lro If the driver supports tcp(4) large receive offloading,
> > > > >enable LRO on the interface.
> > > > > 
> > > > > Also, we have used "tso" keyword for tcp segmentation offloading for
> > > > > the same reason, until it became global net.inet.tcp.tso.
> > > > 
> > > > Is it going to be used to enable lro for udp and other protocols as 
> > > > well?
> > > 
> > > Why not? We have tso feature system wide, so why don't have receive
> > > offloading feature global for all supported protocols? Especially since
> > > I suspect this control will be moved from ifconfig to global
> > > net.inet.tcp.lro like net.inet.tcp.tso.
> > 
> > Maybe we can make lro the default, and then move it to net.inet.tcp.lro.
> > But I like to see another driver to implement it first.
> > 
> > > However, I'm not the fan of original "tcprecvoffload" and like shorter
> > > naming.
> > 
> > Can we use ifconfig tcplro for now?
> > + it only affects TCP
> > + user see that it is related to TCP
> > + it is not a 3 letter abrevation claudio does not like
> > + it is shorter than tcprecvoffload
> > 
> > cons
> > - FreeBSD calls it lro
> > 
> 
> Feel free to use tcplro.

Do so.  OK jan@



Re: ifconfig rename tcplro

2023-06-06 Thread Jan Klemkow
On Tue, Jun 06, 2023 at 09:37:22AM -0700, Chris Cappuccio wrote:
> Jan Klemkow [j.klem...@wemelug.de] wrote:
> > On Tue, Jun 06, 2023 at 05:54:31PM +0300, Vitaliy Makkoveev wrote:
> > > On Tue, Jun 06, 2023 at 02:31:52PM +0200, Alexander Bluhm wrote:
> > > > I would suggest to rename ifconfig tcprecvoffload to tcplro.  Maybe
> > > > it's just because I had to type that long name too often.
> > > > 
> > > > With that we have consistent naming:
> > > > # ifconfig ix0 tcplro
> > > > # sysctl net.inet.tcp.tso=1
> > > > 
> > > > Also the coresponding flag are named LRO.
> > > > # ifconfig ix1 hwfeatures
> > > > ix1: flags=2008843 mtu 1500
> > > > 
> > > > hwfeatures=71b7
> > > >  hardmtu 9198
> > > > 
> > > > The feature is quite new, so I have no backward compatiblity concerns.
> > > > 
> > > > ok?
> > > 
> > > Could you name it "lro" like FreeBSD uses?
> > 
> > I also would prefer this one.
> 
> and tcpsendoffload back to tso ?
> 
> was the reason for changing it from tso due to the initial conflation
> of TSO and LRO in the tree?

Yes.  At the start of this, I just want to keep it simple with one
ifconfig option "tso".  But, tso is now default in tcp_output() and
can be controlled globally via sysctl(2) net.inet.tcp.tso.  Thus, we
just need to control LRO per interface.



Re: ifconfig rename tcplro

2023-06-06 Thread Jan Klemkow
On Tue, Jun 06, 2023 at 05:54:31PM +0300, Vitaliy Makkoveev wrote:
> On Tue, Jun 06, 2023 at 02:31:52PM +0200, Alexander Bluhm wrote:
> > I would suggest to rename ifconfig tcprecvoffload to tcplro.  Maybe
> > it's just because I had to type that long name too often.
> > 
> > With that we have consistent naming:
> > # ifconfig ix0 tcplro
> > # sysctl net.inet.tcp.tso=1
> > 
> > Also the coresponding flag are named LRO.
> > # ifconfig ix1 hwfeatures
> > ix1: flags=2008843 mtu 1500
> > 
> > hwfeatures=71b7
> >  hardmtu 9198
> > 
> > The feature is quite new, so I have no backward compatiblity concerns.
> > 
> > ok?
> 
> Could you name it "lro" like FreeBSD uses?

I also would prefer this one.



Re: ifconfig rename tcplro

2023-06-06 Thread Jan Klemkow
On Tue, Jun 06, 2023 at 02:31:52PM +0200, Alexander Bluhm wrote:
> I would suggest to rename ifconfig tcprecvoffload to tcplro.  Maybe
> it's just because I had to type that long name too often.
> 
> With that we have consistent naming:
> # ifconfig ix0 tcplro
> # sysctl net.inet.tcp.tso=1
> 
> Also the coresponding flag are named LRO.
> # ifconfig ix1 hwfeatures
> ix1: flags=2008843 mtu 1500
> 
> hwfeatures=71b7
>  hardmtu 9198
> 
> The feature is quite new, so I have no backward compatiblity concerns.
> 
> ok?

I like this shorter naming.
Its OK from my side.

> Index: sbin/ifconfig/ifconfig.8
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sbin/ifconfig/ifconfig.8,v
> retrieving revision 1.396
> diff -u -p -r1.396 ifconfig.8
> --- sbin/ifconfig/ifconfig.8  1 Jun 2023 18:57:53 -   1.396
> +++ sbin/ifconfig/ifconfig.8  6 Jun 2023 12:18:07 -
> @@ -501,7 +501,7 @@ Query and display information and diagno
>  modules installed in an interface.
>  It is only supported by drivers implementing the necessary functionality
>  on hardware which supports it.
> -.It Cm tcprecvoffload
> +.It Cm tcplro
>  Enable TCP large receive offload (LRO) if it's supported by the hardware; see
>  .Cm hwfeatures .
>  LRO enabled network interfaces modify received TCP/IP packets.
> @@ -517,7 +517,7 @@ It is not possible to use LRO with inter
>  or
>  .Xr tpmr 4 .
>  Changing this option will re-initialize the network interface.
> -.It Cm -tcprecvoffload
> +.It Cm -tcplro
>  Disable LRO.
>  LRO is disabled by default.
>  .It Cm up
> Index: sbin/ifconfig/ifconfig.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sbin/ifconfig/ifconfig.c,v
> retrieving revision 1.465
> diff -u -p -r1.465 ifconfig.c
> --- sbin/ifconfig/ifconfig.c  1 Jun 2023 18:57:54 -   1.465
> +++ sbin/ifconfig/ifconfig.c  6 Jun 2023 12:18:59 -
> @@ -471,8 +471,8 @@ const struct  cmd {
>   { "-soii",  IFXF_INET6_NOSOII,  0,  setifxflags },
>   { "monitor",IFXF_MONITOR,   0,  setifxflags },
>   { "-monitor",   -IFXF_MONITOR,  0,  setifxflags },
> - { "tcprecvoffload", IFXF_LRO,   0,  setifxflags },
> - { "-tcprecvoffload", -IFXF_LRO, 0,  setifxflags },
> + { "tcplro", IFXF_LRO,   0,  setifxflags },
> + { "-tcplro",-IFXF_LRO,  0,  setifxflags },
>  #ifndef SMALL
>   { "hwfeatures", NEXTARG0,   0,  printifhwfeatures },
>   { "metric", NEXTARG,0,  setifmetric },
> 



Re: Virtio fix for testing

2023-05-26 Thread Jan Klemkow
On Wed, May 24, 2023 at 08:50:26PM +0200, Stefan Fritsch wrote:
> I forgot to mention that no stress test is necessary. If it boots and the
> virtio devices work at all, that should be enough.

Works for me on Linux/KVM with the following devices:

vga1 at pci0 dev 2 function 0 "Qumranet Virtio 1.x GPU" rev 0x01
virtio0 at pci0 dev 4 function 0 "Qumranet Virtio Storage" rev 0x00
virtio1 at pci0 dev 6 function 0 "Qumranet Virtio Console" rev 0x00
virtio2 at pci0 dev 7 function 0 "Qumranet Virtio Memory Balloon" rev 0x00
virtio3 at pci0 dev 8 function 0 "Qumranet Virtio Network" rev 0x00

and on OpenBSD/VMM with:

virtio0 at pci0 dev 1 function 0 "Qumranet Virtio RNG" rev 0x00
virtio1 at pci0 dev 2 function 0 "Qumranet Virtio Network" rev 0x00
virtio2 at pci0 dev 3 function 0 "Qumranet Virtio Storage" rev 0x00
virtio3 at pci0 dev 4 function 0 "Qumranet Virtio SCSI" rev 0x00

Thanks,
Jan



Re: ix(4): LRO forwarding

2023-05-25 Thread Jan Klemkow
On Wed, May 24, 2023 at 05:28:58PM +0200, Alexander Bluhm wrote:
> On Tue, May 23, 2023 at 02:14:57PM +0200, Jan Klemkow wrote:
> > Hi,
> > 
> > This diff sets needed offloading flags and the calculated mss to LRO
> > mbufs in ix(4).  Thus, we can forward this packets and process them via
> > tcp_if_output_tso().  This diff also uses tcp_if_output_tso() in
> > ip6_forward().
> > 
> > I tested the ip6_forward path via the address family transition in pf:
> > 
> > pass in inet from 192.168.1.1 to 192.168.13.2 af-to \
> > inet6 from fc00:13::1 to fc00:13::2
> > 
> > ok?
> 
> crashes during my tests with lro turned on.  Looks like devision
> by zero.

I added a check, that avoids the TSO flags if mss it zero.  Thus, we
avoid a division by zero in later TSO processing.

ok?

Thanks,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.196
diff -u -p -r1.196 if_ix.c
--- dev/pci/if_ix.c 23 May 2023 09:16:16 -  1.196
+++ dev/pci/if_ix.c 25 May 2023 20:02:06 -
@@ -3257,13 +3257,40 @@ ixgbe_rxeof(struct rx_ring *rxr)
 
if (sendmp->m_pkthdr.ph_mss > 0) {
struct ether_extracted ext;
+   uint64_t hlen;
uint16_t pkts = sendmp->m_pkthdr.ph_mss;
 
+   /* Calculate header size. */
ether_extract_headers(sendmp, &ext);
-   if (ext.tcp)
+   hlen = sizeof(*ext.eh);
+   if (ext.ip4) {
+   hlen += ext.ip4->ip_hl << 2;
+   } else if (ext.ip6) {
+   if (ext.ip6->ip6_nxt == IPPROTO_TCP)
+   hlen += sizeof(*ext.ip6);
+   else
+   tcpstat_inc(tcps_inbadlro);
+   }
+   if (ext.tcp) {
tcpstat_inc(tcps_inhwlro);
-   else
+   hlen += ext.tcp->th_off << 2;
+   } else {
tcpstat_inc(tcps_inbadlro);
+   }
+
+   /*
+* If we gonna forward this packet, we have to
+* mark it as TSO, recalculate the TCP checksum
+* and set a correct mss.
+*/
+   sendmp->m_pkthdr.ph_mss =
+   (sendmp->m_pkthdr.len - hlen) / pkts;
+
+   if (sendmp->m_pkthdr.ph_mss != 0) {
+   SET(sendmp->m_pkthdr.csum_flags,
+   M_TCP_CSUM_OUT | M_TCP_TSO);
+   }
+
tcpstat_add(tcps_inpktlro, pkts);
}
 
Index: netinet6/ip6_forward.c
===
RCS file: /cvs/src/sys/netinet6/ip6_forward.c,v
retrieving revision 1.109
diff -u -p -r1.109 ip6_forward.c
--- netinet6/ip6_forward.c  5 Apr 2023 13:56:31 -   1.109
+++ netinet6/ip6_forward.c  25 May 2023 20:03:06 -
@@ -63,8 +63,10 @@
 #include 
 #include 
 #include 
-#include 
 #endif
+#include 
+#include 
+#include 
 
 /*
  * Forward a packet.  If some error occurs return the sender
@@ -316,7 +318,11 @@ reroute:
goto reroute;
}
 #endif
-   in6_proto_cksum_out(m, ifp);
+
+   error = tcp_if_output_tso(ifp, &m, sin6tosa(sin6), rt, IFCAP_TSOv6,
+   ifp->if_mtu);
+   if (error || m == NULL)
+   goto freecopy;
 
/* Check the size after pf_test to give pf a chance to refragment. */
if (m->m_pkthdr.len > ifp->if_mtu) {
@@ -326,6 +332,8 @@ reroute:
m_freem(m);
goto out;
}
+
+   in6_proto_cksum_out(m, ifp);
 
error = ifp->if_output(ifp, m, sin6tosa(sin6), rt);
if (error) {



ix(4): LRO forwarding

2023-05-23 Thread Jan Klemkow
Hi,

This diff sets needed offloading flags and the calculated mss to LRO
mbufs in ix(4).  Thus, we can forward this packets and process them via
tcp_if_output_tso().  This diff also uses tcp_if_output_tso() in
ip6_forward().

I tested the ip6_forward path via the address family transition in pf:

pass in inet from 192.168.1.1 to 192.168.13.2 af-to \
inet6 from fc00:13::1 to fc00:13::2

ok?

bye,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.196
diff -u -p -r1.196 if_ix.c
--- dev/pci/if_ix.c 23 May 2023 09:16:16 -  1.196
+++ dev/pci/if_ix.c 23 May 2023 11:02:52 -
@@ -3257,13 +3257,38 @@ ixgbe_rxeof(struct rx_ring *rxr)
 
if (sendmp->m_pkthdr.ph_mss > 0) {
struct ether_extracted ext;
+   uint64_t hlen;
uint16_t pkts = sendmp->m_pkthdr.ph_mss;
 
+   /* Calculate header size. */
ether_extract_headers(sendmp, &ext);
-   if (ext.tcp)
+   hlen = sizeof(*ext.eh);
+   if (ext.ip4) {
+   hlen += ext.ip4->ip_hl << 2;
+   } else if (ext.ip6) {
+   if (ext.ip6->ip6_nxt == IPPROTO_TCP)
+   hlen += sizeof(*ext.ip6);
+   else
+   tcpstat_inc(tcps_inbadlro);
+   }
+   if (ext.tcp) {
tcpstat_inc(tcps_inhwlro);
-   else
+   hlen += ext.tcp->th_off << 2;
+   } else {
tcpstat_inc(tcps_inbadlro);
+   }
+
+   /*
+* If we gonna forward this packet, we have to
+* mark it as TSO, recalculate the TCP checksum
+* and set a correct mss.
+*/
+   SET(sendmp->m_pkthdr.csum_flags,
+   M_TCP_CSUM_OUT | M_TCP_TSO);
+
+   sendmp->m_pkthdr.ph_mss =
+   (sendmp->m_pkthdr.len - hlen) / pkts;
+
tcpstat_add(tcps_inpktlro, pkts);
}
 
Index: netinet6/ip6_forward.c
===
RCS file: /cvs/src/sys/netinet6/ip6_forward.c,v
retrieving revision 1.109
diff -u -p -r1.109 ip6_forward.c
--- netinet6/ip6_forward.c  5 Apr 2023 13:56:31 -   1.109
+++ netinet6/ip6_forward.c  23 May 2023 11:59:19 -
@@ -63,8 +63,10 @@
 #include 
 #include 
 #include 
-#include 
 #endif
+#include 
+#include 
+#include 
 
 /*
  * Forward a packet.  If some error occurs return the sender
@@ -316,7 +318,11 @@ reroute:
goto reroute;
}
 #endif
-   in6_proto_cksum_out(m, ifp);
+
+   error = tcp_if_output_tso(ifp, &m, sin6tosa(sin6), rt, IFCAP_TSOv6,
+   ifp->if_mtu);
+   if (error || m == NULL)
+   goto freecopy;
 
/* Check the size after pf_test to give pf a chance to refragment. */
if (m->m_pkthdr.len > ifp->if_mtu) {
@@ -326,6 +332,8 @@ reroute:
m_freem(m);
goto out;
}
+
+   in6_proto_cksum_out(m, ifp);
 
error = ifp->if_output(ifp, m, sin6tosa(sin6), rt);
if (error) {



Fix wrong interface mtu in tcp_mss

2023-05-19 Thread Jan Klemkow
Hi,

We use the wrong interface and mtu in tcp_mss() to calculate the mss if
the destination address points is a local address.  In ip_output() we
use the correct interface and its mtu.

This limits the mss to 1448 if the mtu of the interface it 1500,
instead of using a local 32k mss.

The bigger issue is: local bulk traffic with the current TSO
implementation is broken.  tcp_output() creates TSO packets with an mss
smaller then 32k and ip_output() calls if_output instead of
tcp_if_output_tso() because it fits into the mtu check of lo0.

This diff takes the same logic to pick the interface in tcp_mss() as its
done in ip_output() and fixes both issues.

ok?

bye,
Jan

Index: netinet/tcp_input.c
===
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.387
diff -u -p -r1.387 tcp_input.c
--- netinet/tcp_input.c 14 Mar 2023 00:24:05 -  1.387
+++ netinet/tcp_input.c 19 May 2023 17:22:47 -
@@ -2805,7 +2805,11 @@ tcp_mss(struct tcpcb *tp, int offer)
if (rt == NULL)
goto out;
 
-   ifp = if_get(rt->rt_ifidx);
+   if (ISSET(rt->rt_flags, RTF_LOCAL))
+   ifp = if_get(rtable_loindex(inp->inp_rtableid));
+   else
+   ifp = if_get(rt->rt_ifidx);
+
if (ifp == NULL)
goto out;
 



Re: Add LRO counter in ix(4)

2023-05-18 Thread Jan Klemkow
On Thu, May 18, 2023 at 12:01:44AM +0200, Alexander Bluhm wrote:
> On Tue, May 16, 2023 at 09:11:48PM +0200, Jan Klemkow wrote:
> > @@ -412,6 +412,10 @@ tcp_stats(char *name)
> > p(tcps_outhwtso, "\t\t%u output TSO packet%s hardware processed\n");
> > p(tcps_outpkttso, "\t\t%u output TSO packet%s generated\n");
> > p(tcps_outbadtso, "\t\t%u output TSO packet%s dropped\n");
> > +   p(tcps_inhwlro, "\t\t%u input LRO generated packet%s from hardware\n");
> > +   p(tcps_inpktlro, "\t\t%u input LRO coalesced packet%s from hardware\n");
> 
> ... coalesced packet%s by hardware

done

> > +   p(tcps_inbadlro, "\t\t%u input bad LRO packet%s from hardware\n");
> > +
> 
> Move this down to the "packets received" section.  You included it
> in "packets sent".

done

> > +   /*
> > +* This function iterates over interleaved descriptors.
> > +* Thus, we reuse ph_mss as global segment counter per
> > +* TCP connection, insteat of introducing a new variable
> 
> s/insteat/instead/

done

ok?

Thanks,
Jan

diff --git a/sys/dev/pci/if_ix.c b/sys/dev/pci/if_ix.c
index 4119a2416dc..924a6d63236 100644
--- a/sys/dev/pci/if_ix.c
+++ b/sys/dev/pci/if_ix.c
@@ -3214,12 +3214,23 @@ ixgbe_rxeof(struct rx_ring *rxr)
sendmp = rxbuf->fmp;
rxbuf->buf = rxbuf->fmp = NULL;
 
-   if (sendmp != NULL) /* secondary frag */
+   if (sendmp != NULL) { /* secondary frag */
sendmp->m_pkthdr.len += mp->m_len;
-   else {
+
+   /*
+* This function iterates over interleaved descriptors.
+* Thus, we reuse ph_mss as global segment counter per
+* TCP connection, instead of introducing a new variable
+* in m_pkthdr.
+*/
+   if (rsccnt)
+   sendmp->m_pkthdr.ph_mss += rsccnt - 1;
+   } else {
/* first desc of a non-ps chain */
sendmp = mp;
sendmp->m_pkthdr.len = mp->m_len;
+   if (rsccnt)
+   sendmp->m_pkthdr.ph_mss = rsccnt - 1;
 #if NVLAN > 0
if (sc->vlan_stripping && staterr & IXGBE_RXD_STAT_VP) {
sendmp->m_pkthdr.ether_vtag = vtag;
@@ -3241,6 +3252,21 @@ ixgbe_rxeof(struct rx_ring *rxr)
SET(sendmp->m_pkthdr.csum_flags, M_FLOWID);
}
 
+   if (sendmp->m_pkthdr.ph_mss == 1)
+   sendmp->m_pkthdr.ph_mss = 0;
+
+   if (sendmp->m_pkthdr.ph_mss > 0) {
+   struct ether_extracted ext;
+   uint16_t pkts = sendmp->m_pkthdr.ph_mss;
+
+   ether_extract_headers(sendmp, &ext);
+   if (ext.tcp)
+   tcpstat_inc(tcps_inhwlro);
+   else
+   tcpstat_inc(tcps_inbadlro);
+   tcpstat_add(tcps_inpktlro, pkts);
+   }
+
ml_enqueue(&ml, sendmp);
}
 next_desc:
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 120e3cc5ea7..3970636cde1 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1340,6 +1340,9 @@ tcp_sysctl_tcpstat(void *oldp, size_t *oldlenp, void 
*newp)
ASSIGN(tcps_outhwtso);
ASSIGN(tcps_outpkttso);
ASSIGN(tcps_outbadtso);
+   ASSIGN(tcps_inhwlro);
+   ASSIGN(tcps_inpktlro);
+   ASSIGN(tcps_inbadlro);
 
 #undef ASSIGN
 
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 0a9630d719f..e706fedd0e7 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -447,6 +447,9 @@ struct  tcpstat {
u_int32_t tcps_outhwtso;/* output tso processed by hardware */
u_int32_t tcps_outpkttso;   /* packets generated by tso */
u_int32_t tcps_outbadtso;   /* output tso failed, packet dropped */
+   u_int32_t tcps_inhwlro; /* input lro from hardware */
+   u_int32_t tcps_inpktlro;/* packets coalessed by hardware lro */
+   u_int32_t tcps_inbadlro;/* input bad lro packets from hardware 
*/
 };
 
 /*
@@ -625,6 +628,9 @@ enum tcpstat_counters {
tcps_outhwtso,
tcps_outpkttso,
tcps_outbadtso,
+   tcps_inhwlro,
+   tcps_inpktlro,
+   tcps_inbadlro,
tcps_ncounters,
 };
 

Add LRO counter in ix(4)

2023-05-16 Thread Jan Klemkow
Hi,

This diff introduces new counters for LRO packets, we get from the
network interface.  It shows, how many packets the network interface has
coalesced into LRO packets.

In followup diff, this packet counter will also be used to set the
ph_mss variable to valid value.  So, the stack is able to forward or
redirect this kind of packets.

ok?

bye,
Jan

Index: usr.bin/netstat/inet.c
===
RCS file: /cvs/src/usr.bin/netstat/inet.c,v
retrieving revision 1.175
diff -u -p -r1.175 inet.c
--- usr.bin/netstat/inet.c  10 May 2023 12:07:17 -  1.175
+++ usr.bin/netstat/inet.c  16 May 2023 17:55:20 -
@@ -412,6 +412,10 @@ tcp_stats(char *name)
p(tcps_outhwtso, "\t\t%u output TSO packet%s hardware processed\n");
p(tcps_outpkttso, "\t\t%u output TSO packet%s generated\n");
p(tcps_outbadtso, "\t\t%u output TSO packet%s dropped\n");
+   p(tcps_inhwlro, "\t\t%u input LRO generated packet%s from hardware\n");
+   p(tcps_inpktlro, "\t\t%u input LRO coalesced packet%s from hardware\n");
+   p(tcps_inbadlro, "\t\t%u input bad LRO packet%s from hardware\n");
+
p(tcps_rcvtotal, "\t%u packet%s received\n");
p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t%u ack%s (for %llu 
byte%s)\n");
p(tcps_rcvdupack, "\t\t%u duplicate ack%s\n");
Index: sys/dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.194
diff -u -p -r1.194 if_ix.c
--- sys/dev/pci/if_ix.c 16 May 2023 14:32:54 -  1.194
+++ sys/dev/pci/if_ix.c 16 May 2023 18:49:33 -
@@ -3175,12 +3175,23 @@ ixgbe_rxeof(struct rx_ring *rxr)
sendmp = rxbuf->fmp;
rxbuf->buf = rxbuf->fmp = NULL;
 
-   if (sendmp != NULL) /* secondary frag */
+   if (sendmp != NULL) { /* secondary frag */
sendmp->m_pkthdr.len += mp->m_len;
-   else {
+
+   /*
+* This function iterates over interleaved descriptors.
+* Thus, we reuse ph_mss as global segment counter per
+* TCP connection, insteat of introducing a new variable
+* in m_pkthdr.
+*/
+   if (rsccnt)
+   sendmp->m_pkthdr.ph_mss += rsccnt - 1;
+   } else {
/* first desc of a non-ps chain */
sendmp = mp;
sendmp->m_pkthdr.len = mp->m_len;
+   if (rsccnt)
+   sendmp->m_pkthdr.ph_mss = rsccnt - 1;
 #if NVLAN > 0
if (sc->vlan_stripping && staterr & IXGBE_RXD_STAT_VP) {
sendmp->m_pkthdr.ether_vtag = vtag;
@@ -3200,6 +3211,21 @@ ixgbe_rxeof(struct rx_ring *rxr)
if (hashtype != IXGBE_RXDADV_RSSTYPE_NONE) {
sendmp->m_pkthdr.ph_flowid = hash;
SET(sendmp->m_pkthdr.csum_flags, M_FLOWID);
+   }
+
+   if (sendmp->m_pkthdr.ph_mss == 1)
+   sendmp->m_pkthdr.ph_mss = 0;
+
+   if (sendmp->m_pkthdr.ph_mss > 0) {
+   struct ether_extracted ext;
+   uint16_t pkts = sendmp->m_pkthdr.ph_mss;
+
+   ether_extract_headers(sendmp, &ext);
+   if (ext.tcp)
+   tcpstat_inc(tcps_inhwlro);
+   else
+   tcpstat_inc(tcps_inbadlro);
+   tcpstat_add(tcps_inpktlro, pkts);
}
 
ml_enqueue(&ml, sendmp);
Index: sys/dev/pci/ixgbe.h
===
RCS file: /cvs/src/sys/dev/pci/ixgbe.h,v
retrieving revision 1.33
diff -u -p -r1.33 ixgbe.h
--- sys/dev/pci/ixgbe.h 8 Feb 2022 03:38:00 -   1.33
+++ sys/dev/pci/ixgbe.h 16 May 2023 17:55:20 -
@@ -60,12 +60,18 @@
 
 #include 
 #include 
+#include 
 #include 
 
+struct tdb;
+
 #include 
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #if NBPFILTER > 0
 #include 
Index: sys/netinet/tcp_usrreq.c
===
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.218
diff -u -p -r1.218 tcp_usrreq.c
--- sys/netinet/tcp_usrreq.c10 May 2023 12:07:16 -  1.218
+++ sys/netinet/tcp_usrreq.c16 May 2023 17:55:20 -
@@ -1340,6 +1340,9 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o
ASSIGN(tcps_outhwtso);
ASSIGN(tcps_outpkttso);
ASSIGN(tcps_outbadtso);
+   ASSIGN(tcps_inhwlro);
+   ASSIGN(tcps_inpktlro);
+   ASSIG

Re: seperate LRO/TSO flags

2023-05-15 Thread Jan Klemkow
On Mon, May 15, 2023 at 11:40:20AM +0200, Alexander Bluhm wrote:
> On Mon, May 15, 2023 at 09:34:21AM +0200, Jan Klemkow wrote:
> > @@ -251,12 +251,16 @@ struct if_status_description {
> >  #defineIFCAP_VLAN_HWTAGGING0x0020  /* hardware VLAN tag 
> > support */
> >  #defineIFCAP_CSUM_TCPv60x0080  /* can do IPv6/TCP 
> > checksums */
> >  #defineIFCAP_CSUM_UDPv60x0100  /* can do IPv6/UDP 
> > checksums */
> > -#defineIFCAP_TSO   0x4000  /* TCP segment 
> > offloading */
> > +#defineIFCAP_LRO   0x1000  /* TCP large recv 
> > offload */
> > +#defineIFCAP_TSOv4 0x2000  /* TCP segmentation 
> > offload */
> > +#defineIFCAP_TSOv6 0x4000  /* TCP segmentation 
> > offload */
> >  #defineIFCAP_WOL   0x8000  /* can do wake on lan */
> 
> I would prefer to keep the numbers of IFCAP_TSO/IFCAP_LRO as this
> is just a naming error.  Then we have less confusion during the
> ifconfig transition phase.
> 
> +#define IFCAP_TSOv4  0x1000
> +#define IFCAP_TSOv6  0x2000
> -#define IFCAP_TSO0x4000
> +#define IFCAP_LRO0x4000
> 
> > +#define IFCAP_TSO  (IFCAP_TSOv4 | IFCAP_TSOv6)
> > +
> 
> Could you please remove this chunk and expand it, where is used?
> This one more define does not make the code clearer.  And this flag
> IFCAP_TSO had a different meaning before renaming.  When it is not
> introduced again, the compiler makes sure that no renaming was
> forgotten.

done

Also:

 - updated the diff to the current source state
 - improved the vlan(4) capability handling

@dlg: Whats your opinion about this diff?

ok?

Thanks,
Jan

Index: sbin/ifconfig/ifconfig.8
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.394
diff -u -p -r1.394 ifconfig.8
--- sbin/ifconfig/ifconfig.826 Apr 2023 02:38:08 -  1.394
+++ sbin/ifconfig/ifconfig.815 May 2023 18:46:48 -
@@ -282,8 +282,18 @@ tag.
 As CSUM_TCPv4, but supports IPv6 datagrams.
 .It Sy CSUM_UDPv6
 As above, for UDP.
-.It Sy TSO
-The device supports TCP segment offloading (TSO).
+.It Sy LRO
+The device supports TCP large receive offload (LRO).
+.It Sy TSOv4
+The device supports IPv4 TCP segmentation offload (TSO).
+TSO is used by default.
+Use the
+.Xr sysctl 8
+variable
+.Va net.inet.tcp.tso
+to disable this feature.
+.It Sy TSOv6
+As above, for IPv6.
 .It Sy WOL
 The device supports Wake on LAN (WoL).
 .It Sy hardmtu
@@ -491,25 +501,25 @@ Query and display information and diagno
 modules installed in an interface.
 It is only supported by drivers implementing the necessary functionality
 on hardware which supports it.
-.It Cm tso
-Enable TCP segmentation offloading (TSO) if it's supported by the hardware; see
+.It Cm tcprecvoffload
+Enable TCP large receive offload (LRO) if it's supported by the hardware; see
 .Cm hwfeatures .
-TSO enabled NICs modify received TCP/IP packets.
+LRO enabled network interfaces modify received TCP/IP packets.
 This will also affect traffic of upper layer interfaces,
 such as
 .Xr vlan 4 ,
 .Xr aggr 4 ,
 and
 .Xr carp 4 .
-It is not possible to use TSO with interfaces attached to a
+It is not possible to use LRO with interfaces attached to a
 .Xr bridge 4 ,
 .Xr veb 4 ,
 or
 .Xr tpmr 4 .
 Changing this option will re-initialize the network interface.
-.It Cm -tso
-Disable TSO.
-TSO is disabled by default.
+.It Cm -tcprecvoffload
+Disable LRO.
+LRO is disabled by default.
 .It Cm up
 Mark an interface
 .Dq up .
Index: sbin/ifconfig/ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.463
diff -u -p -r1.463 ifconfig.c
--- sbin/ifconfig/ifconfig.c12 May 2023 18:24:13 -  1.463
+++ sbin/ifconfig/ifconfig.c15 May 2023 20:27:51 -
@@ -126,7 +126,7 @@
 #define HWFEATURESBITS \
"\024\1CSUM_IPv4\2CSUM_TCPv4\3CSUM_UDPv4"   \
"\5VLAN_MTU\6VLAN_HWTAGGING\10CSUM_TCPv6"   \
-   "\11CSUM_UDPv6\17TSO\20WOL"
+   "\11CSUM_UDPv6\15TSOv4\16TSOv6\17LSO\20WOL"
 
 struct ifencap {
unsigned int ife_flags;
@@ -469,8 +469,8 @@ const structcmd {
{ "-soii",  IFXF_INET6_NOSOII,  0,  setifxflags },
{ "monitor",IFXF_MONITOR,   0,  setifxflags },
{ "-monitor",   -IFXF_MONITOR,  0,  setifxflags },
-   { "tso",IFXF_TSO,   0,  setifxflags },
-   { "-tso",   -IFXF_TSO,  0,  setifxf

Re: seperate LRO/TSO flags

2023-05-15 Thread Jan Klemkow
On Sat, May 13, 2023 at 04:44:18PM +0200, Christian Weisgerber wrote:
> Jan Klemkow:
> 
> > This diff introduces separate flags for TCP offloading.  We split this
> > into LRO (large receive offloading) and TSO (TCP segmentation
> > offloading).  Thus, we are able to turn it on/off separately.
> 
> Wait, why do we even have a knob for TSO?
> 
> We specifically decided not to have a knob for checksum offloading,
> because it should just work out of the box, and if it doesn't, then
> it should be disabled by the driver.  It should not be the admin's
> task to figure out if the implementation is broken and to fiddle
> with the knobs (hi, FreeBSD!).
> 
> I would assume that line of thinking extends to TSO.

You are right.  This is reflected in the current state of the diff
below.

We just need a knob for TCP Large Receive Offload (LRO) because it
changes the TCP segments.  You may want to avoid this on a forwarding
router.

ok?

Thanks,
Jan

Index: sbin/ifconfig/ifconfig.8
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.394
diff -u -p -r1.394 ifconfig.8
--- sbin/ifconfig/ifconfig.826 Apr 2023 02:38:08 -  1.394
+++ sbin/ifconfig/ifconfig.812 May 2023 06:22:35 -
@@ -282,8 +282,18 @@ tag.
 As CSUM_TCPv4, but supports IPv6 datagrams.
 .It Sy CSUM_UDPv6
 As above, for UDP.
-.It Sy TSO
-The device supports TCP segment offloading (TSO).
+.It Sy LRO
+The device supports TCP large receive offload (LRO).
+.It Sy TSOv4
+The device supports IPv4 TCP segmentation offload (TSO).
+TSO is used by default.
+Use the
+.Xr sysctl 8
+variable
+.Va net.inet.tcp.tso
+to disable this feature.
+.It Sy TSOv6
+As above, for IPv6.
 .It Sy WOL
 The device supports Wake on LAN (WoL).
 .It Sy hardmtu
@@ -491,25 +501,25 @@ Query and display information and diagno
 modules installed in an interface.
 It is only supported by drivers implementing the necessary functionality
 on hardware which supports it.
-.It Cm tso
-Enable TCP segmentation offloading (TSO) if it's supported by the hardware; see
+.It Cm tcprecvoffload
+Enable TCP large receive offload (LRO) if it's supported by the hardware; see
 .Cm hwfeatures .
-TSO enabled NICs modify received TCP/IP packets.
+LRO enabled network interfaces modify received TCP/IP packets.
 This will also affect traffic of upper layer interfaces,
 such as
 .Xr vlan 4 ,
 .Xr aggr 4 ,
 and
 .Xr carp 4 .
-It is not possible to use TSO with interfaces attached to a
+It is not possible to use LRO with interfaces attached to a
 .Xr bridge 4 ,
 .Xr veb 4 ,
 or
 .Xr tpmr 4 .
 Changing this option will re-initialize the network interface.
-.It Cm -tso
-Disable TSO.
-TSO is disabled by default.
+.It Cm -tcprecvoffload
+Disable LRO.
+LRO is disabled by default.
 .It Cm up
 Mark an interface
 .Dq up .
Index: sbin/ifconfig/ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.462
diff -u -p -r1.462 ifconfig.c
--- sbin/ifconfig/ifconfig.c8 Mar 2023 04:43:06 -   1.462
+++ sbin/ifconfig/ifconfig.c11 May 2023 17:33:55 -
@@ -126,7 +126,7 @@
 #define HWFEATURESBITS \
"\024\1CSUM_IPv4\2CSUM_TCPv4\3CSUM_UDPv4"   \
"\5VLAN_MTU\6VLAN_HWTAGGING\10CSUM_TCPv6"   \
-   "\11CSUM_UDPv6\17TSO\20WOL"
+   "\11CSUM_UDPv6\15LRO\16TSOv4\17TSOv6\20WOL"
 
 struct ifencap {
unsigned int ife_flags;
@@ -469,8 +469,8 @@ const structcmd {
{ "-soii",  IFXF_INET6_NOSOII,  0,  setifxflags },
{ "monitor",IFXF_MONITOR,   0,  setifxflags },
{ "-monitor",   -IFXF_MONITOR,  0,  setifxflags },
-   { "tso",IFXF_TSO,   0,  setifxflags },
-   { "-tso",   -IFXF_TSO,  0,  setifxflags },
+   { "tcprecvoffload", IFXF_LRO,   0,  setifxflags },
+   { "-tcprecvoffload", -IFXF_LRO, 0,  setifxflags },
 #ifndef SMALL
{ "hwfeatures", NEXTARG0,   0,  printifhwfeatures },
{ "metric", NEXTARG,0,  setifmetric },
@@ -674,7 +674,7 @@ const structcmd {
"\7RUNNING\10NOARP\11PROMISC\12ALLMULTI\13OACTIVE\14SIMPLEX"\
"\15LINK0\16LINK1\17LINK2\20MULTICAST"  \
"\23AUTOCONF6TEMP\24MPLS\25WOL\26AUTOCONF6\27INET6_NOSOII"  \
-   "\30AUTOCONF4" "\31MONITOR" "\32TSO"
+   "\30AUTOCONF4" "\31MONITOR" "\32LRO"
 
 intgetinfo(struct ifreq *, int);
 void   getsock(int);
Index: 

Re: ifconfig: SIOCSIFFLAGS: device not configured

2023-05-12 Thread Jan Klemkow
On Thu, May 11, 2023 at 09:17:37PM +0200, Hrvoje Popovski wrote:
> is it possible to change "ifconfig: SIOCSIFFLAGS: device not configured"
> message that it has an interface name in it, something like:
> ifconfig pfsync0: SIOCSIFFLAGS: device not configured <- in my case.
> 
> I have many vlans and static routes in my setup and while testing some
> diffs, it took me a long time to figure out which interface the message
> was coming from.
> 
> starting network
> add host 10.11.2.69: gateway 10.12.253.225
> add host 10.250.184.36: gateway 10.12.253.225
> add host 9.9.9.9: gateway 10.12.253.225
> add host 10.11.1.234: gateway 10.12.253.225
> add host 10.11.1.235: gateway 10.12.253.225
> add host 10.11.255.123: gateway 10.12.253.225
> add net 10.101/16: gateway 10.12.253.225
> ifconfig: SIOCSIFFLAGS: Device not configured
> add net 16/8: gateway 192.168.100.112
> add net a192:a168:a100:a100::/64: gateway 192:168:1000:1000::112
> add net 48/8: gateway 192.168.111.112
> add net a192:a168:a111:a111::/64: gateway 192:168::::112
> reordering: ld.so libc libcrypto sshd.
> 
> or when I'm doing sh /etc/netstart and have aggr interface
> 
> ifconfig: SIOCSTRUNKPORT: Device busy
> ifconfig: SIOCSTRUNKPORT: Device busy
> 
> to change
> ifconfig ix0: SIOCSTRUNKPORT: Device busy
> ifconfig ix1: SIOCSTRUNKPORT: Device busy

I also run into this issue sometimes.  So, here is diff that prints the
interface name in front of most of these anonym error messages.

ok?

Jan

Index: ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.462
diff -u -p -r1.462 ifconfig.c
--- ifconfig.c  8 Mar 2023 04:43:06 -   1.462
+++ ifconfig.c  12 May 2023 14:14:01 -
@@ -1070,14 +1070,14 @@ printgroup(char *groupname, int ifaliase
errno == ENOENT)
return (-1);
else
-   err(1, "SIOCGIFGMEMB");
+   err(1, "%s: SIOCGIFGMEMB", ifgr.ifgr_name);
}
 
len = ifgr.ifgr_len;
if ((ifgr.ifgr_groups = calloc(1, len)) == NULL)
err(1, "printgroup");
if (ioctl(sock, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1)
-   err(1, "SIOCGIFGMEMB");
+   err(1, "%s: SIOCGIFGMEMB", ifgr.ifgr_name);
 
for (ifg = ifgr.ifgr_groups; ifg && len >= sizeof(struct ifg_req);
ifg++) {
@@ -1099,7 +1099,7 @@ printgroupattribs(char *groupname)
bzero(&ifgr, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, groupname, sizeof(ifgr.ifgr_name));
if (ioctl(sock, SIOCGIFGATTR, (caddr_t)&ifgr) == -1)
-   err(1, "SIOCGIFGATTR");
+   err(1, "%s: SIOCGIFGATTR", ifgr.ifgr_name);
 
printf("%s:", groupname);
printf(" carp demote count %d", ifgr.ifgr_attrib.ifg_carp_demoted);
@@ -1122,7 +1122,8 @@ setgroupattribs(char *groupname, int arg
if (argc > 1) {
neg = strtonum(argv[1], 0, 128, &errstr);
if (errstr)
-   errx(1, "invalid carp demotion: %s", errstr);
+   errx(1, "%s: invalid carp demotion: %s", ifgr.ifgr_name,
+   errstr);
}
 
if (p[0] == '-') {
@@ -1135,7 +1136,7 @@ setgroupattribs(char *groupname, int arg
usage();
 
if (ioctl(sock, SIOCSIFGATTR, (caddr_t)&ifgr) == -1)
-   err(1, "SIOCSIFGATTR");
+   err(1, "%s: SIOCSIFGATTR", ifgr.ifgr_name);
 }
 
 void
@@ -1249,7 +1250,7 @@ clone_create(const char *addr, int param
 
(void) strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
if (ioctl(sock, SIOCIFCREATE, &ifr) == -1)
-   err(1, "SIOCIFCREATE");
+   err(1, "%s: SIOCIFCREATE", ifr.ifr_name);
 }
 
 void
@@ -1258,7 +1259,7 @@ clone_destroy(const char *addr, int para
 
(void) strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
if (ioctl(sock, SIOCIFDESTROY, &ifr) == -1)
-   err(1, "SIOCIFDESTROY");
+   err(1, "%s: SIOCIFDESTROY", ifr.ifr_name);
 }
 
 struct if_clonereq *
@@ -1422,7 +1423,7 @@ setifflags(const char *vname, int value)
bcopy((char *)&ifr, (char *)&my_ifr, sizeof(struct ifreq));
 
if (ioctl(sock, SIOCGIFFLAGS, (caddr_t)&my_ifr) == -1)
-   err(1, "SIOCGIFFLAGS");
+   err(1, "%s: SIOCGIFFLAGS", my_ifr.ifr_name);
(void) strlcpy(my_ifr.ifr_name, ifname, sizeof(my_ifr.ifr_name));
flags = my_ifr.ifr_flags;
 
@@ -1433,7 +1434,7 @@ setifflags(const char *vname, int value)
flags |= value;
my_ifr.ifr_flags = flags;
if (ioctl(sock, SIOCSIFFLAGS, (caddr_t)&my_ifr) == -1)
-   err(1, "SIOCSIFFLAGS");
+   err(1, "%s: SIOCSIFFLAGS", my_ifr.ifr_name);
 }
 
 void
@@ -1444,7 +1445,7 @@ setifxflags(const char *vname, int value
bcopy((char *)&ifr, (char *)&my_ifr, sizeof(struct ifreq));
 
if (io

Re: seperate LRO/TSO flags

2023-05-10 Thread Jan Klemkow
On Wed, May 10, 2023 at 11:13:04AM -0600, Todd C. Miller wrote:
> On Wed, 10 May 2023 19:03:58 +0200, Jan Klemkow wrote:
> > This diff introduces separate flags for TCP offloading.  We split this
> > into LRO (large receive offloading) and TSO (TCP segmentation
> > offloading).  Thus, we are able to turn it on/off separately.
> >
> > For ifconfig(8) we use "tcprecvoffload" and "tcpsendoffload".  So, the
> > user has a better insight of what this features are doing.
> 
> Is it possible to control these at the address family level?  In
> other words, is it possible to enable "tcprecvoffload" and
> "tcpsendoffload" for inet but not inet6 or vice versa?

For tcprecvoffload and ix(4) it's not possible to enable/disable it per
address family.  Its just one flag for the hardware.

For tcpsendoffload its possible, but I won't do that till its necessary.

Why would you want to differentiate the address families here?

bye,
Jan



seperate LRO/TSO flags

2023-05-10 Thread Jan Klemkow
Hi,

This diff introduces separate flags for TCP offloading.  We split this
into LRO (large receive offloading) and TSO (TCP segmentation
offloading).  Thus, we are able to turn it on/off separately.

For ifconfig(8) we use "tcprecvoffload" and "tcpsendoffload".  So, the
user has a better insight of what this features are doing.

ok?

bye,
Jan

Index: sbin/ifconfig/ifconfig.8
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.394
diff -u -p -r1.394 ifconfig.8
--- sbin/ifconfig/ifconfig.826 Apr 2023 02:38:08 -  1.394
+++ sbin/ifconfig/ifconfig.810 May 2023 16:22:30 -
@@ -282,8 +282,10 @@ tag.
 As CSUM_TCPv4, but supports IPv6 datagrams.
 .It Sy CSUM_UDPv6
 As above, for UDP.
+.It Sy LRO
+The device supports TCP large receive offloading (LRO).
 .It Sy TSO
-The device supports TCP segment offloading (TSO).
+The device supports TCP segmentation offloading (TSO).
 .It Sy WOL
 The device supports Wake on LAN (WoL).
 .It Sy hardmtu
@@ -491,10 +493,30 @@ Query and display information and diagno
 modules installed in an interface.
 It is only supported by drivers implementing the necessary functionality
 on hardware which supports it.
-.It Cm tso
+.It Cm tcprecvoffload
+Enable TCP large receive offloading (LRO) if it's supported by the hardware; 
see
+.Cm hwfeatures .
+LRO enabled network interfaces modify received TCP/IP packets.
+This will also affect traffic of upper layer interfaces,
+such as
+.Xr vlan 4 ,
+.Xr aggr 4 ,
+and
+.Xr carp 4 .
+It is not possible to use LRO with interfaces attached to a
+.Xr bridge 4 ,
+.Xr veb 4 ,
+or
+.Xr tpmr 4 .
+Changing this option will re-initialize the network interface.
+.It Cm -tcprecvoffload
+Disable LRO.
+LRO is disabled by default.
+.It Cm tcpsendoffload
 Enable TCP segmentation offloading (TSO) if it's supported by the hardware; see
 .Cm hwfeatures .
-TSO enabled NICs modify received TCP/IP packets.
+TSO enabled network interfaces are able to split large TCP segments into 
smaller
+peaces that fits into MTU and MSS.
 This will also affect traffic of upper layer interfaces,
 such as
 .Xr vlan 4 ,
@@ -506,8 +528,7 @@ It is not possible to use TSO with inter
 .Xr veb 4 ,
 or
 .Xr tpmr 4 .
-Changing this option will re-initialize the network interface.
-.It Cm -tso
+.It Cm -tcpsendoffload
 Disable TSO.
 TSO is disabled by default.
 .It Cm up
Index: sbin/ifconfig/ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.462
diff -u -p -r1.462 ifconfig.c
--- sbin/ifconfig/ifconfig.c8 Mar 2023 04:43:06 -   1.462
+++ sbin/ifconfig/ifconfig.c10 May 2023 15:40:17 -
@@ -126,7 +126,7 @@
 #define HWFEATURESBITS \
"\024\1CSUM_IPv4\2CSUM_TCPv4\3CSUM_UDPv4"   \
"\5VLAN_MTU\6VLAN_HWTAGGING\10CSUM_TCPv6"   \
-   "\11CSUM_UDPv6\17TSO\20WOL"
+   "\11CSUM_UDPv6\16LRO\17TSO\20WOL"
 
 struct ifencap {
unsigned int ife_flags;
@@ -469,8 +469,10 @@ const struct   cmd {
{ "-soii",  IFXF_INET6_NOSOII,  0,  setifxflags },
{ "monitor",IFXF_MONITOR,   0,  setifxflags },
{ "-monitor",   -IFXF_MONITOR,  0,  setifxflags },
-   { "tso",IFXF_TSO,   0,  setifxflags },
-   { "-tso",   -IFXF_TSO,  0,  setifxflags },
+   { "tcprecvoffload", IFXF_LRO,   0,  setifxflags },
+   { "-tcprecvoffload", -IFXF_LRO, 0,  setifxflags },
+   { "tcpsendoffload", IFXF_TSO,   0,  setifxflags },
+   { "-tcpsendoffload", -IFXF_TSO, 0,  setifxflags },
 #ifndef SMALL
{ "hwfeatures", NEXTARG0,   0,  printifhwfeatures },
{ "metric", NEXTARG,0,  setifmetric },
@@ -674,7 +676,7 @@ const structcmd {
"\7RUNNING\10NOARP\11PROMISC\12ALLMULTI\13OACTIVE\14SIMPLEX"\
"\15LINK0\16LINK1\17LINK2\20MULTICAST"  \
"\23AUTOCONF6TEMP\24MPLS\25WOL\26AUTOCONF6\27INET6_NOSOII"  \
-   "\30AUTOCONF4" "\31MONITOR" "\32TSO"
+   "\30AUTOCONF4" "\31MONITOR" "\32LRO" "\33TSO"
 
 intgetinfo(struct ifreq *, int);
 void   getsock(int);
Index: sys/dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.193
diff -u -p -r1.193 if_ix.c
--- sys/dev/pci/if_ix.c 28 Apr 2023 10:18:57 -  1.193
+++ sys/dev/pci/if_ix.c 10 May 2023 16:32:44 -
@@ -1925,7 +1925,7 @@ ixgbe_setup_interface(struct ix_softc *s
ifp->if_capabilities |= IFCAP_CSUM_IPv4;
 
if (sc->hw.mac.type != ixgbe_mac_82598EB)
-   ifp->if_capabilities |= IFCAP_TSO;
+   ifp->if_capabilities |= IFCAP_LRO;
 
/*
 * Sp

Re: software tcp send offloading

2023-05-09 Thread Jan Klemkow
On Tue, May 09, 2023 at 09:56:36AM +0200, Alexander Bluhm wrote:
> On Sun, May 07, 2023 at 09:00:31PM +0200, Alexander Bluhm wrote:
> > Not sure if I addressed all corner cases already.  I think IPsec
> > is missing.
> 
> Updated diff:
> - parts have been commited
> - works with IPsec now

Thanks for this solution.  Looks much better to me, then an IPSec lookup
in tcp_output() as its done in FreeBSD.

> - some bugs fixed
> - sysctl net.inet.tcp.tso
> - netstat TSO counter
> 
> If you test this, recompile sysctl and netstat with new kernel
> headers.  Then you can see, whether the diff has an effect on your
> setup.
> 
> # netstat -s -p tcp | grep TSO
> 79 output TSO packets software chopped
> 0 output TSO packets hardware processed
> 840 output TSO packets generated
> 0 output TSO packets dropped

Good idea.

> If you run into problems, disable the feature, and report if the
> problem goes away.  This helps to locate the bug.
> 
> # sysctl net.inet.tcp.tso=0
> net.inet.tcp.tso: 1 -> 0
> 
> I would like to keep the sysctl for now.  It makes performance
> comparison easier.  When we add hardware TSO it can be a quick
> workaround for driver problems.
> 
> When this has been tested a bit, I think it is ready for commit.
> Remaining issues can be handled in tree.  My tests pass, I am not
> aware of TCP problems.

I also did some testing in my setups.  Everything works.

> ok?

Diff looks fine to me, too.

ok jan@

> bluhm
> 
> Index: sys/net/pf.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> retrieving revision 1.1177
> diff -u -p -r1.1177 pf.c
> --- sys/net/pf.c  8 May 2023 13:22:13 -   1.1177
> +++ sys/net/pf.c  8 May 2023 22:37:04 -
> @@ -6561,6 +6561,16 @@ pf_route(struct pf_pdesc *pd, struct pf_
>   goto done;
>   }
>  
> + if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) &&
> + m0->m_pkthdr.ph_mss <= ifp->if_mtu) {
> + if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) ||
> + if_output_ml(ifp, &ml, sintosa(dst), rt))
> + goto done;
> + tcpstat_inc(tcps_outswtso);
> + goto done;
> + }
> + CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO);
> +
>   /*
>* Too large for interface; fragment if possible.
>* Must be able to put at least 8 bytes per fragment.
> @@ -6594,6 +6604,7 @@ void
>  pf_route6(struct pf_pdesc *pd, struct pf_state *st)
>  {
>   struct mbuf *m0;
> + struct mbuf_list ml;
>   struct sockaddr_in6 *dst, sin6;
>   struct rtentry  *rt = NULL;
>   struct ip6_hdr  *ip6;
> @@ -6685,11 +6696,21 @@ pf_route6(struct pf_pdesc *pd, struct pf
>   goto done;
>   }
>  
> - if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
> + if (m0->m_pkthdr.len <= ifp->if_mtu) {
>   in6_proto_cksum_out(m0, ifp);
>   ifp->if_output(ifp, m0, sin6tosa(dst), rt);
>   goto done;
>   }
> +
> + if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) &&
> + m0->m_pkthdr.ph_mss <= ifp->if_mtu) {
> + if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) ||
> + if_output_ml(ifp, &ml, sin6tosa(dst), rt))
> + goto done;
> + tcpstat_inc(tcps_outswtso);
> + goto done;
> + }
> + CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO);
>  
>   ip6stat_inc(ip6s_cantfrag);
>   if (st->rt != PF_DUPTO)
> Index: sys/netinet/in.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in.h,v
> retrieving revision 1.142
> diff -u -p -r1.142 in.h
> --- sys/netinet/in.h  11 Apr 2023 00:45:09 -  1.142
> +++ sys/netinet/in.h  8 May 2023 13:47:48 -
> @@ -780,6 +780,7 @@ int  in_canforward(struct in_addr);
>  int in_cksum(struct mbuf *, int);
>  int in4_cksum(struct mbuf *, u_int8_t, int, int);
>  voidin_proto_cksum_out(struct mbuf *, struct ifnet *);
> +int in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
>  voidin_ifdetach(struct ifnet *);
>  int in_mask2len(struct in_addr *);
>  voidin_len2mask(struct in_addr *, int);
> Index: sys/netinet/ip_output.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
> retrieving revision 1.384
> diff -u -p -r1.384 ip_output.c
> --- sys/netinet/ip_output.c   8 May 2023 13:22:13 -   1.384
> +++ sys/netinet/ip_output.c   8 May 2023 22:37:04 -
> @@ -84,7 +84,6 @@ void ip_mloopback(struct ifnet *, struct
>  static __inline u_int16_t __attribute__((__unused__))
>  in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t);
>  void in_delayed_cksum(struct mbuf *);
> -int in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
>  
>  int ip_output_ipsec_looku

Re: em(4) multiqueue

2023-04-25 Thread Jan Klemkow
On Fri, Apr 14, 2023 at 10:26:14AM +0800, Kevin Lo wrote:
> On Thu, Apr 13, 2023 at 01:30:36PM -0500, Brian Conway wrote:
> > Reviving this thread, apologies for discontinuity in mail readers: 
> > https://marc.info/?t=16564219358
> > 
> > After rebasing on 7.3, my results have mirrored Hrvoje's testing at
> > the end of that thread. No issues with throughput, unusual latency,
> > or reliability. `vmstat -i` shows some level of balancing between
> > the queues. I've been testing on as many em(4) systems as I have
> > access to, some manually, some in a packet forwarder/firewall
> > scenarios:
> 
> Last time I tested (about a year go) on I211, rx locked up if I tried 
> something
> like iperf3 or tcpbench.  Don't know if you have a similar problem.

I rebased the rest to current and tested it with tcpbench between the
following interfaces:

em0 at pci7 dev 0 function 0 "Intel 82580" rev 0x01, msix, 4 queues, address 
90:e2:ba:df:d5:2c
em0 at pci5 dev 0 function 0 "Intel I350" rev 0x01, msix, 8 queues, address 
00:25:90:eb:b3:c2

After a second the connection stucked.  As far as I can see, the
sending side got a problem.

ot45# tcpbench 192.168.99.3
  elapsed_ms  bytes mbps   bwidth
1012   14574120  115.210  100.00%
Conn:   1 Mbps:  115.210 Peak Mbps:  115.210 Avg Mbps:  115.210
2022  00.000-nan%
...

ot46# tcpbench -s
  elapsed_ms  bytes mbps   bwidth
1017   14313480  112.594  100.00%
Conn:   1 Mbps:  112.594 Peak Mbps:  112.594 Avg Mbps:  112.594
2027  00.000-nan%
...

ot45# netstat  -nf inet -p tcp
Active Internet connections
Proto   Recv-Q Send-Q  Local Address  Foreign AddressTCP-State
tcp  0 260640  192.168.99.1.18530 192.168.99.3.12345 CLOSING

When I retried it, it sometimes work and most times not.

kstat tells me, that transmit queues 1 to 3 are oactive and just 0
works:

em0:0:txq:0
 packets: 4042648 packets
   bytes: 5310138322 bytes
  qdrops: 9 packets
  errors: 0 packets
qlen: 0 packets
 maxqlen: 511 packets
 oactive: false
em0:0:txq:1
 packets: 9812 packets
   bytes: 14846716 bytes
  qdrops: 0 packets
  errors: 0 packets
qlen: 184 packets
 maxqlen: 511 packets
 oactive: true
em0:0:txq:2
 packets: 690362 packets
   bytes: 60011484 bytes
  qdrops: 0 packets
  errors: 0 packets
qlen: 185 packets
 maxqlen: 511 packets
 oactive: true
em0:0:txq:3
 packets: 443181 packets
   bytes: 43829886 bytes
  qdrops: 0 packets
  errors: 0 packets
qlen: 198 packets
 maxqlen: 511 packets
 oactive: true

This is the rebased diff on current i tested:

Index: dev/pci/files.pci
===
RCS file: /cvs/src/sys/dev/pci/files.pci,v
retrieving revision 1.361
diff -u -p -r1.361 files.pci
--- dev/pci/files.pci   23 Apr 2023 00:20:26 -  1.361
+++ dev/pci/files.pci   25 Apr 2023 11:25:47 -
@@ -334,7 +334,7 @@ attach  fxp at pci with fxp_pci
 file   dev/pci/if_fxp_pci.cfxp_pci
 
 # Intel Pro/1000
-device em: ether, ifnet, ifmedia
+device em: ether, ifnet, ifmedia, intrmap, stoeplitz
 attach em at pci
 file   dev/pci/if_em.c em
 file   dev/pci/if_em_hw.c  em
Index: dev/pci/if_em.c
===
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.365
diff -u -p -r1.365 if_em.c
--- dev/pci/if_em.c 9 Feb 2023 21:21:27 -   1.365
+++ dev/pci/if_em.c 25 Apr 2023 11:25:47 -
@@ -247,6 +247,7 @@ int  em_intr(void *);
 int  em_allocate_legacy(struct em_softc *);
 void em_start(struct ifqueue *);
 int  em_ioctl(struct ifnet *, u_long, caddr_t);
+int  em_rxrinfo(struct em_softc *, struct if_rxrinfo *);
 void em_watchdog(struct ifnet *);
 void em_init(void *);
 void em_stop(void *, int);
@@ -309,8 +310,10 @@ int  em_setup_queues_msix(struct em_soft
 int  em_queue_intr_msix(void *);
 int  em_link_intr_msix(void *);
 void em_enable_queue_intr_msix(struct em_queue *);
+void em_setup_rss(struct em_softc *);
 #else
 #define em_allocate_msix(_sc)  (-1)
+#define em_setup_rss(_sc)  0
 #endif
 
 #if NKSTAT > 0
@@ -333,7 +336,6 @@ struct cfdriver em_cd = {
 };
 
 static int em_smart_pwr_down = FALSE;
-int em_enable_msix = 0;
 
 /*
  *  Device identification routine
@@ -629,12 +631,12 @@ err_pci:
 void
 em_start(struct ifqueue *ifq)
 {
+   struct em_queue *que = ifq->ifq_softc;
struct ifnet *ifp = ifq->ifq_if;
struct em_softc *sc = ifp->if_softc;
u_int head, free, used;
struct mbuf *m;
int post = 0;
-   struct em_que

Re: libcrypto: Fix EINVAL in openssl/tls_init

2023-03-27 Thread Jan Klemkow
On Fri, Mar 24, 2023 at 10:02:05PM +0100, Theo Buehler wrote:
> > Thus, I would suggest to set this constant to ELAST.  So, we will avoid
> > useless unknown error strings and a non-zero errno after tls_init().
> 
> ELAST isn't portable. It's under __BSD_VISIBLE in sys/errno.h.
> 
> It would seem better to use the save_errno idiom to store the errno
> at the start of the loop and restore it at the end.
> 
> And yes, we should fix this, after unluck.

ok?

Thanks,
Jan

Index: err/err.c
===
RCS file: /cvs/src/lib/libcrypto/err/err.c,v
retrieving revision 1.50
diff -u -p -r1.50 err.c
--- err/err.c   26 Dec 2022 07:18:52 -  1.50
+++ err/err.c   27 Mar 2023 07:58:25 -
@@ -580,6 +580,7 @@ build_SYS_str_reasons(void)
static char strerror_tab[NUM_SYS_STR_REASONS][LEN_SYS_STR_REASON];
int i;
static int init = 1;
+   int save_errno = errno;
 
CRYPTO_r_lock(CRYPTO_LOCK_ERR);
if (!init) {
@@ -594,6 +595,8 @@ build_SYS_str_reasons(void)
return;
}
 
+   /* strerror(3) will set errno to EINVAL when i is an unknown error. */
+   save_errno = errno;
for (i = 1; i <= NUM_SYS_STR_REASONS; i++) {
ERR_STRING_DATA *str = &SYS_str_reasons[i - 1];
 
@@ -610,6 +613,7 @@ build_SYS_str_reasons(void)
if (str->string == NULL)
str->string = "unknown";
}
+   errno = save_errno;
 
/* Now we still have SYS_str_reasons[NUM_SYS_STR_REASONS] = {0, NULL},
 * as required by ERR_load_strings. */



libcrypto: Fix EINVAL in openssl/tls_init

2023-03-24 Thread Jan Klemkow
Hi,

after tls_init() and OPENSSL_init_ssl() errno is always set to EINVAL.
This is caused by a routine that tries to prefetch all error strings
up to 127 from strerror(3).  But, strerror(3) sets EINVAL for unknown
values of error.

Thus, I would suggest to set this constant to ELAST.  So, we will avoid
useless unknown error strings and a non-zero errno after tls_init().

I guess this is not serious enough for the current release.  But, we
might fix this after unlocking of the tree?

ok?

bye,
Jan

Index: lib/libcrypto//err/err.c
===
RCS file: /cvs/src/lib/libcrypto/err/err.c,v
retrieving revision 1.50
diff -u -p -r1.50 err.c
--- lib/libcrypto//err/err.c26 Dec 2022 07:18:52 -  1.50
+++ lib/libcrypto//err/err.c24 Mar 2023 20:07:18 -
@@ -560,7 +560,7 @@ int_err_get_next_lib(void)
 
 
 #ifndef OPENSSL_NO_ERR
-#define NUM_SYS_STR_REASONS 127
+#define NUM_SYS_STR_REASONS ELAST
 #define LEN_SYS_STR_REASON 32
 
 static ERR_STRING_DATA SYS_str_reasons[NUM_SYS_STR_REASONS + 1];



Re: refactor mbuf parsing on driver level

2023-02-06 Thread Jan Klemkow
On Mon, Feb 06, 2023 at 09:47:57PM +0100, Christian Weisgerber wrote:
> Christian Weisgerber:
> 
> > I also switched over em(4) to this and have successfully used it
> > for a full 30-hour package build on the four amd64 ports machines
> > with their I350 interfaces.  Additionally, I've done some IPv6
> > testing at home over an I210.
> 
> ok for this?

I tested it with I350.  Diff look fine.

ok jan@

> igc(4) has very similar code, but I don't have access to a machine
> with that hardware.

Send me an ssh-key and I give you access to this machine:
http://obsd-lab.genua.de/hw/ot34.html

Thanks,
Jan

> > diff f8646d27d4041e5f595c04e17a876f12600deea7 
> > f3f95d0cc0957a2f1e961cace4c3c9dd869e8c9e
> > commit - f8646d27d4041e5f595c04e17a876f12600deea7
> > commit + f3f95d0cc0957a2f1e961cace4c3c9dd869e8c9e
> > blob - c840377f0a3f1ef3c3e3072657698d8085ffd3a0
> > blob + 523ed5b0a18718c50bb30e2995d293fa1d2199a6
> > --- sys/dev/pci/if_em.c
> > +++ sys/dev/pci/if_em.c
> > @@ -2398,12 +2398,11 @@ em_tx_ctx_setup(struct em_queue *que, struct mbuf 
> > *mp,
> >  em_tx_ctx_setup(struct em_queue *que, struct mbuf *mp, u_int head,
> >  u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
> >  {
> > +   struct ether_extracted ext;
> > struct e1000_adv_tx_context_desc *TD;
> > -   struct ether_header *eh = mtod(mp, struct ether_header *);
> > -   struct mbuf *m;
> > uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
> > -   int off = 0, hoff;
> > -   uint8_t ipproto, iphlen;
> > +   int off = 0;
> > +   uint8_t iphlen;
> >  
> > *olinfo_status = 0;
> > *cmd_type_len = 0;
> > @@ -2418,44 +2417,26 @@ em_tx_ctx_setup(struct em_queue *que, struct mbuf 
> > *mp,
> > }
> >  #endif
> >  
> > -   vlan_macip_lens |= (sizeof(*eh) << E1000_ADVTXD_MACLEN_SHIFT);
> > -   
> > -   switch (ntohs(eh->ether_type)) {
> > -   case ETHERTYPE_IP: {
> > -   struct ip *ip;
> > +   ether_extract_headers(mp, &ext);
> >  
> > -   m = m_getptr(mp, sizeof(*eh), &hoff);
> > -   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
> > +   vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
> >  
> > -   iphlen = ip->ip_hl << 2;
> > -   ipproto = ip->ip_p;
> > +   if (ext.ip4) {
> > +   iphlen = ext.ip4->ip_hl << 2;
> >  
> > type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
> > if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
> > *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
> > off = 1;
> > }
> > -
> > -   break;
> > -   }
> >  #ifdef INET6
> > -   case ETHERTYPE_IPV6: {
> > -   struct ip6_hdr *ip6;
> > +   } else if (ext.ip6) {
> > +   iphlen = sizeof(*ext.ip6);
> >  
> > -   m = m_getptr(mp, sizeof(*eh), &hoff);
> > -   ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
> > -
> > -   iphlen = sizeof(*ip6);
> > -   ipproto = ip6->ip6_nxt;
> > -
> > type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
> > -   break;
> > -   }
> >  #endif
> > -   default:
> > +   } else {
> > iphlen = 0;
> > -   ipproto = 0;
> > -   break;
> > }
> >  
> > *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
> > @@ -2464,21 +2445,18 @@ em_tx_ctx_setup(struct em_queue *que, struct mbuf 
> > *mp,
> > vlan_macip_lens |= iphlen;
> > type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
> >  
> > -   switch (ipproto) {
> > -   case IPPROTO_TCP:
> > +   if (ext.tcp) {
> > type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
> > if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
> > *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
> > off = 1;
> > }
> > -   break;
> > -   case IPPROTO_UDP:
> > +   } else if (ext.udp) {
> > type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
> > if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
> > *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
> > off = 1;
> > }
> > -   break;
> > }
> >  
> > if (!off)
> > 
> 
> -- 
> Christian "naddy" Weisgerber  na...@mips.inka.de
> 



Re: refactor mbuf parsing on driver level

2023-01-31 Thread Jan Klemkow
On Tue, Jan 31, 2023 at 09:12:51PM +0100, Christian Weisgerber wrote:
> Jan Klemkow:
> 
> >  - I turned the KASSERTS to returns.
> >  - Check if the mbuf is large enough for an ether header.
> >  - additionally #ifdef'd INET6 around the ip6_hdr in the new struct
> 
> For non-initial fragments of TCP/UDP packets, ether_extract_headers()
> will create ext.tcp/ext.udp pointers that do not point to a protocol
> header.  Should there be a check to exclude fragments?

yes.  bluhm also suggested this solution to me.

ok?

Thanks,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.191
diff -u -p -r1.191 if_ix.c
--- dev/pci/if_ix.c 26 Jan 2023 07:32:39 -  1.191
+++ dev/pci/if_ix.c 31 Jan 2023 21:05:40 -
@@ -2477,25 +2477,16 @@ static inline int
 ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
 uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
 {
-   struct ether_header *eh = mtod(mp, struct ether_header *);
-   struct mbuf *m;
-   int hoff;
+   struct ether_extracted ext;
int offload = 0;
uint32_t iphlen;
-   uint8_t ipproto;
 
-   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   ether_extract_headers(mp, &ext);
 
-   switch (ntohs(eh->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
+   *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
 
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
-
-   iphlen = ip->ip_hl << 2;
-   ipproto = ip->ip_p;
+   if (ext.ip4) {
+   iphlen = ext.ip4->ip_hl << 2;
 
if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
@@ -2503,46 +2494,30 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
-   break;
-   }
-
 #ifdef INET6
-   case ETHERTYPE_IPV6: {
-   struct ip6_hdr *ip6;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6));
-   ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
-
-   iphlen = sizeof(*ip6);
-   ipproto = ip6->ip6_nxt;
+   } else if (ext.ip6) {
+   iphlen = sizeof(*ext.ip6);
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
-   break;
-   }
 #endif
-
-   default:
+   } else {
return offload;
}
 
*vlan_macip_lens |= iphlen;
 
-   switch (ipproto) {
-   case IPPROTO_TCP:
+   if (ext.tcp) {
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
offload = 1;
}
-   break;
-   case IPPROTO_UDP:
+   } else if (ext.udp) {
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
offload = 1;
}
-   break;
}
 
return offload;
Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.86
diff -u -p -r1.86 if_ixl.c
--- dev/pci/if_ixl.c26 Jan 2023 07:32:39 -  1.86
+++ dev/pci/if_ixl.c31 Jan 2023 21:05:40 -
@@ -2784,10 +2784,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
 static uint64_t
 ixl_tx_setup_offload(struct mbuf *m0)
 {
-   struct mbuf *m;
-   int hoff;
+   struct ether_extracted ext;
uint64_t hlen;
-   uint8_t ipproto;
uint64_t offload = 0;
 
if (ISSET(m0->m_flags, M_VLANTAG)) {
@@ -2800,39 +2798,21 @@ ixl_tx_setup_offload(struct mbuf *m0)
M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
return (offload);
 
-   switch (ntohs(mtod(m0, struct ether_header *)->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
-
-   m = m_getptr(m0, ETHER_HDR_LEN, &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+   ether_extract_headers(m0, &ext);
 
+   if (ext.ip4) {
offload |= ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) ?
I

Re: refactor mbuf parsing on driver level

2023-01-30 Thread Jan Klemkow
On Fri, Jan 27, 2023 at 04:44:36PM +0100, Christian Weisgerber wrote:
> > The ether_extract_headers() diff was reverted, because is wrong for the
> > cases other than tcp/udp/icmp. We need to fix it and recommit again
> > before continue.
> 
> I think (TCP or) UDP fragments are the problem.  Fragments don't have
> the protocol header but will still end up here:
> 
> case IPPROTO_UDP:
> m = m_getptr(m, hoff + hlen, &hoff);
> KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ext->udp));
> ext->udp = (struct udphdr *)(mtod(m, caddr_t) + hoff);
> break;
> 
> If a tail fragment is too short, it will trigger the KASSERT().
> 
> Previously, this wasn't a problem, because if there was such a
> KASSERT() as in ixl(4), it was behind a M_*_CSUM_OUT check, and we
> never set those flags for fragments.

I changed the diff below to be more robust and reconstruct my test
equipment to build permanently over NFS.

 - I turned the KASSERTS to returns.
 - Check if the mbuf is large enough for an ether header.
 - additionally #ifdef'd INET6 around the ip6_hdr in the new struct

Tested the diff on NFS client and server with several kernel builds.

ok?

Thanks,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.191
diff -u -p -r1.191 if_ix.c
--- dev/pci/if_ix.c 26 Jan 2023 07:32:39 -  1.191
+++ dev/pci/if_ix.c 27 Jan 2023 13:37:13 -
@@ -2477,25 +2477,16 @@ static inline int
 ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
 uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
 {
-   struct ether_header *eh = mtod(mp, struct ether_header *);
-   struct mbuf *m;
-   int hoff;
+   struct ether_extracted ext;
int offload = 0;
uint32_t iphlen;
-   uint8_t ipproto;
 
-   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   ether_extract_headers(mp, &ext);
 
-   switch (ntohs(eh->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
+   *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
 
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
-
-   iphlen = ip->ip_hl << 2;
-   ipproto = ip->ip_p;
+   if (ext.ip4) {
+   iphlen = ext.ip4->ip_hl << 2;
 
if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
@@ -2503,46 +2494,30 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
-   break;
-   }
-
 #ifdef INET6
-   case ETHERTYPE_IPV6: {
-   struct ip6_hdr *ip6;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6));
-   ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
-
-   iphlen = sizeof(*ip6);
-   ipproto = ip6->ip6_nxt;
+   } else if (ext.ip6) {
+   iphlen = sizeof(*ext.ip6);
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
-   break;
-   }
 #endif
-
-   default:
+   } else {
return offload;
}
 
*vlan_macip_lens |= iphlen;
 
-   switch (ipproto) {
-   case IPPROTO_TCP:
+   if (ext.tcp) {
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
offload = 1;
}
-   break;
-   case IPPROTO_UDP:
+   } else if (ext.udp) {
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
offload = 1;
}
-   break;
}
 
return offload;
Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.86
diff -u -p -r1.86 if_ixl.c
--- dev/pci/if_ixl.c26 Jan 2023 07:32:39 -  1.86
+++ dev/pci/if_ixl.c27 Jan 2023 13:37:13 -
@@ -2784,10 +2784,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
 static uint64_t
 ixl_tx_setup_offload(struct mbuf *m0)
 {
-   struct mbuf *m;
-   int hoff;
+   struct ether_extracted ext;
uint64_t hlen;
-   uint8_t ipproto;
uint64_t offload = 0;
 
if (ISSET(m0->m_flags, M_VLANTAG)) {
@@ -2800,39 +2798,21 @@ ixl_tx_setup_offload(struct mbuf *m0)
M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
  

Re: refactor mbuf parsing on driver level

2023-01-26 Thread Jan Klemkow
On Thu, Jan 26, 2023 at 02:06:28PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Jan 26, 2023 at 11:37:51AM +0100, Christian Weisgerber wrote:
> > Jan Klemkow:
> > 
> > > we have several drivers which have to parse the content of mbufs.  This
> > > diff suggest a central parsing function for this.  Thus, we can reduce
> > > redundant code.
> > > 
> > > I just start with ix(4) and ixl(4) because it was easy to test for me.
> > > But, this could also improve em(4), igc(4), ale(4) and oce(4).
> > 
> > Here's the corresponding change for em(4).
> > This only affects 82575, 82576, i350, and i210.
> > Tested on i210.
> > 
> > ok?
> > 
> 
> The ether_extract_headers() diff was reverted, because is wrong for the
> cases other than tcp/udp/icmp. We need to fix it and recommit again
> before continue.

I'm already on the way, to fix this mess.  I'll send a new diff soon.

Sorry this inconvenience,
Jan



Re: refactor mbuf parsing on driver level

2023-01-24 Thread Jan Klemkow
On Tue, Jan 24, 2023 at 05:40:55PM +0300, Vitaliy Makkoveev wrote:
> On Tue, Jan 24, 2023 at 03:14:36PM +0100, Jan Klemkow wrote:
> > On Tue, Jan 24, 2023 at 09:32:53PM +1000, David Gwynne wrote:
> > > On Mon, Jan 23, 2023 at 09:25:34AM +0100, Jan Klemkow wrote:
> > > > On Wed, Jan 18, 2023 at 03:49:25PM -0700, Theo de Raadt wrote:
> > > > > Jan Klemkow  wrote:
> > > > > > On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote:
> > > > > > > On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote:
> > > > > > > > we have several drivers which have to parse the content of 
> > > > > > > > mbufs.  This
> > > > > > > > diff suggest a central parsing function for this.  Thus, we can 
> > > > > > > > reduce
> > > > > > > > redundant code.
> > > > > > > > 
> > > > > > > > I just start with ix(4) and ixl(4) because it was easy to test 
> > > > > > > > for me.
> > > > > > > > But, this could also improve em(4), igc(4), ale(4) and oce(4).
> > > > > > > > 
> > > > > > > > I'm not sure about the name, the api nor the place of this 
> > > > > > > > code.  So, if
> > > > > > > > someone has a better idea: i'm open to anything.
> > > > > > > 
> > > > > > > I like code this deduplication.
> > > > > > > 
> > > > > > > This newly introduced function doesn't touch ifnet but only 
> > > > > > > extracts
> > > > > > > protocol headers from mbuf(9). I guess mbuf_extract_headers() or
> > > > > > > something like is much better for name with the ern/uipc_mbuf2.c 
> > > > > > > as
> > > > > > > place.
> > > > > > 
> > > > > > Good Point.  Updates diff below.
> > > > > 
> > > > > I agree, "extract" is a better name.  dlg, do you have a comment?
> > > > 
> > > > Whats you opinion about this diff?
> > > 
> > > it makes ix and ixl prettier, so that's a good enough reason to do
> > > it. it should go in net/if_ethersubr.c as ether_extract_headers()
> > > though.
> > > 
> > > could you try using a struct to carry the header pointers around and see
> > > what that looks like?
> > > 
> > > struct ether_extracted {
> > >   struct ether_header *eh;
> > >   struct ip   *ip4;
> > >   struct ip6_hdr  *ip6;
> > >   struct tcphdr   *tcp;
> > >   struct udphdr   *udp;
> > > };
> > > 
> > > void ether_extract_headers(struct mbuf *, struct ether_extracted *);
> > > 
> > > you can add a depth or flags argument if you want to be able to
> > > tell it to return before looking for the tcp/udp headers if you
> > > want.
> 
> Looks better then m_extract_headers(). Since ext->eh is always assigned
> to non NULL value below, the "ext->eh = NULL;" is not necessary. Also
> I'm not sure, but is memset() more reliable for `ext' zeroing? Anyway,
> feel free to commit without memset().

OK?

Thanks,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.189
diff -u -p -r1.189 if_ix.c
--- dev/pci/if_ix.c 2 Sep 2022 14:08:09 -   1.189
+++ dev/pci/if_ix.c 24 Jan 2023 13:34:17 -
@@ -2477,25 +2477,16 @@ static inline int
 ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
 uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
 {
-   struct ether_header *eh = mtod(mp, struct ether_header *);
-   struct mbuf *m;
-   int hoff;
+   struct ether_extracted ext;
int offload = 0;
uint32_t iphlen;
-   uint8_t ipproto;
 
-   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   ether_extract_headers(mp, &ext);
 
-   switch (ntohs(eh->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
+   *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
 
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
-
-   iphlen = ip->ip_hl << 2;
-   ipproto = ip->ip_p;
+   

Re: refactor mbuf parsing on driver level

2023-01-24 Thread Jan Klemkow
On Tue, Jan 24, 2023 at 09:32:53PM +1000, David Gwynne wrote:
> On Mon, Jan 23, 2023 at 09:25:34AM +0100, Jan Klemkow wrote:
> > On Wed, Jan 18, 2023 at 03:49:25PM -0700, Theo de Raadt wrote:
> > > Jan Klemkow  wrote:
> > > > On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote:
> > > > > On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote:
> > > > > > we have several drivers which have to parse the content of mbufs.  
> > > > > > This
> > > > > > diff suggest a central parsing function for this.  Thus, we can 
> > > > > > reduce
> > > > > > redundant code.
> > > > > > 
> > > > > > I just start with ix(4) and ixl(4) because it was easy to test for 
> > > > > > me.
> > > > > > But, this could also improve em(4), igc(4), ale(4) and oce(4).
> > > > > > 
> > > > > > I'm not sure about the name, the api nor the place of this code.  
> > > > > > So, if
> > > > > > someone has a better idea: i'm open to anything.
> > > > > 
> > > > > I like code this deduplication.
> > > > > 
> > > > > This newly introduced function doesn't touch ifnet but only extracts
> > > > > protocol headers from mbuf(9). I guess mbuf_extract_headers() or
> > > > > something like is much better for name with the ern/uipc_mbuf2.c as
> > > > > place.
> > > > 
> > > > Good Point.  Updates diff below.
> > > 
> > > I agree, "extract" is a better name.  dlg, do you have a comment?
> > 
> > Whats you opinion about this diff?
> 
> it makes ix and ixl prettier, so that's a good enough reason to do
> it. it should go in net/if_ethersubr.c as ether_extract_headers()
> though.
> 
> could you try using a struct to carry the header pointers around and see
> what that looks like?
> 
> struct ether_extracted {
>   struct ether_header *eh;
>   struct ip   *ip4;
>   struct ip6_hdr  *ip6;
>   struct tcphdr   *tcp;
>   struct udphdr   *udp;
> };
> 
> void ether_extract_headers(struct mbuf *, struct ether_extracted *);
> 
> you can add a depth or flags argument if you want to be able to
> tell it to return before looking for the tcp/udp headers if you
> want.

OK?

Thanks,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.189
diff -u -p -r1.189 if_ix.c
--- dev/pci/if_ix.c 2 Sep 2022 14:08:09 -   1.189
+++ dev/pci/if_ix.c 24 Jan 2023 13:34:17 -
@@ -2477,25 +2477,16 @@ static inline int
 ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
 uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
 {
-   struct ether_header *eh = mtod(mp, struct ether_header *);
-   struct mbuf *m;
-   int hoff;
+   struct ether_extracted ext;
int offload = 0;
uint32_t iphlen;
-   uint8_t ipproto;
 
-   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   ether_extract_headers(mp, &ext);
 
-   switch (ntohs(eh->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
+   *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
 
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
-
-   iphlen = ip->ip_hl << 2;
-   ipproto = ip->ip_p;
+   if (ext.ip4) {
+   iphlen = ext.ip4->ip_hl << 2;
 
if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
@@ -2503,46 +2494,30 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
-   break;
-   }
-
 #ifdef INET6
-   case ETHERTYPE_IPV6: {
-   struct ip6_hdr *ip6;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6));
-   ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
-
-   iphlen = sizeof(*ip6);
-   ipproto = ip6->ip6_nxt;
+   } else if (ext.ip6) {
+   iphlen = sizeof(*ext.ip6);
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
-   break;
-   }
 #endif
-
-   default:
+   } else {
 

ifconfig(8): fix output of missing ipv6 addresses

2023-01-23 Thread Jan Klemkow
Hi,

ifconfig doesn't print ipv6 addresses if its used with media option.

# ifconfig -A
vio0: flags=8843 mtu 1500
...
inet 10.0.1.65 netmask 0xff00 broadcast 10.0.1.255
inet6 fe80::5054:ff:fe6a:b6fd%vio0 prefixlen 64 scopeid 0x1
inet6 fc00:1::1 prefixlen 64
inet 192.168.0.1 netmask 0xff00 broadcast 192.168.0.255

# ifconfig -A media
vio0: flags=8843 mtu 1500
...
supported media:
media autoselect
inet 10.0.1.65 netmask 0xff00 broadcast 10.0.1.255
inet 192.168.0.1 netmask 0xff00 broadcast 192.168.0.255

As the diff below shows, afp is NULL by default, but set to inet if
there is an additional program parameter.  At the end, no specific
address family is assumed if afp is NULL.  Thus, the diff below
introduces a new variable to remember if a specific address family was
set by the user or not for printing all interface addresses.

The regression test of ifconfig(8) is passing with the diff below.

ok?

bye,
Jan

Index: ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.461
diff -u -p -r1.461 ifconfig.c
--- ifconfig.c  18 Jan 2023 21:57:10 -  1.461
+++ ifconfig.c  23 Jan 2023 12:11:30 -
@@ -746,6 +746,7 @@ const struct afswtch {
 };
 
 const struct afswtch *afp; /*the address family being set or asked about*/
+const struct afswtch *pafp;/*the address family being used for printing*/
 
 char joinname[IEEE80211_NWID_LEN];
 size_t joinlen;
@@ -840,7 +841,7 @@ main(int argc, char *argv[])
if (argc > 0) {
for (afp = rafp = afs; rafp->af_name; rafp++)
if (strcmp(rafp->af_name, *argv) == 0) {
-   afp = rafp;
+   pafp = afp = rafp;
argc--;
argv++;
break;
@@ -1216,7 +1217,7 @@ printif(char *name, int ifaliases)
(ifa->ifa_addr->sa_family == AF_INET &&
ifaliases == 0 && noinet == 0))
continue;
-   if ((p = afp) != NULL) {
+   if ((p = pafp) != NULL) {
if (ifa->ifa_addr->sa_family == p->af_af)
p->af_status(1);
} else {
@@ -3514,7 +3515,7 @@ status(int link, struct sockaddr_dl *sdl
 
  proto_status:
if (link == 0) {
-   if ((p = afp) != NULL) {
+   if ((p = pafp) != NULL) {
p->af_status(1);
} else for (p = afs; p->af_name; p++) {
ifr.ifr_addr.sa_family = p->af_af;



Re: refactor mbuf parsing on driver level

2023-01-19 Thread Jan Klemkow
On Thu, Jan 19, 2023 at 02:55:29PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Jan 19, 2023 at 10:40:52AM +0100, Jan Klemkow wrote:
> > On Thu, Jan 19, 2023 at 12:02:29PM +0300, Vitaliy Makkoveev wrote:
> > > On Thu, Jan 19, 2023 at 01:55:57AM +0300, Vitaliy Makkoveev wrote:
> > > > > On 19 Jan 2023, at 01:39, Jan Klemkow  wrote:
> > > > > On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote:
> > > > >> On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote:
> > > > >>> we have several drivers which have to parse the content of mbufs.  
> > > > >>> This
> > > > >>> diff suggest a central parsing function for this.  Thus, we can 
> > > > >>> reduce
> > > > >>> redundant code.
> > > > >>> 
> > > > >>> I just start with ix(4) and ixl(4) because it was easy to test for 
> > > > >>> me.
> > > > >>> But, this could also improve em(4), igc(4), ale(4) and oce(4).
> > > > >>> 
> > > > >>> I'm not sure about the name, the api nor the place of this code.  
> > > > >>> So, if
> > > > >>> someone has a better idea: i'm open to anything.
> > > > >> 
> > > > >> I like code this deduplication.
> > > > >> 
> > > > >> This newly introduced function doesn't touch ifnet but only extracts
> > > > >> protocol headers from mbuf(9). I guess mbuf_extract_headers() or
> > > > >> something like is much better for name with the ern/uipc_mbuf2.c as
> > > > >> place.
> > > > > 
> > > > > Good Point.  Updates diff below.
> > > > > 
> > > > > +
> > > > > +/* Parse different TCP/IP protocol headers for a quick view inside 
> > > > > an mbuf. */
> > > > > +void
> > > > > +m_exract_headers(struct mbuf *mp, struct ether_header **eh, struct 
> > > > > ip **ip4,
> > > > > +struct ip6_hdr **ip6, struct tcphdr **tcp, struct udphdr **udp)
> > > > > +
> > > > 
> > > > Should be m_extract_headers(). The rest of the diff looks good to me.
> > > 
> > > Please wait.
> > > 
> > > The mandatory nullification of `ip4', `ip6' and other variables passed
> > > to m_exract_headers() is not obvious. It is much better to return
> > > the integer result of extraction like m_tag_copy_chain() does.
> > 
> > Yes, the mandatory nullification seems to be more errorprone.  In my
> > opinion is the number of results it not that useful. You have to check
> > the retuned pointers anyway.
> > 
> > I moved the nullification inside of m_exract_headers().
> 
> This is better. I also like the last return statement be removed from
> m_extract_headers() before commit.

Fixed below.  Plus a suggestion from mpi to not pollute the namespace
with all the headers in mbuf.h.  Moved them to uipc_mbuf2.c.

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.189
diff -u -p -r1.189 if_ix.c
--- dev/pci/if_ix.c 2 Sep 2022 14:08:09 -   1.189
+++ dev/pci/if_ix.c 19 Jan 2023 09:29:10 -
@@ -2477,23 +2477,18 @@ static inline int
 ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
 uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
 {
-   struct ether_header *eh = mtod(mp, struct ether_header *);
-   struct mbuf *m;
-   int hoff;
+   struct ether_header *eh;
+   struct ip *ip;
+   struct ip6_hdr *ip6;
int offload = 0;
uint32_t iphlen;
uint8_t ipproto;
 
-   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   m_extract_headers(mp, &eh, &ip, &ip6, NULL, NULL);
 
-   switch (ntohs(eh->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
 
+   if (ip) {
iphlen = ip->ip_hl << 2;
ipproto = ip->ip_p;
 
@@ -2503,26 +2498,14 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
-   break;
-   }
-
 #ifdef INET6
-   case ETHERTYPE_IPV6: {
-   

Re: refactor mbuf parsing on driver level

2023-01-19 Thread Jan Klemkow
On Thu, Jan 19, 2023 at 12:02:29PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Jan 19, 2023 at 01:55:57AM +0300, Vitaliy Makkoveev wrote:
> > > On 19 Jan 2023, at 01:39, Jan Klemkow  wrote:
> > > 
> > > On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote:
> > >> On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote:
> > >>> we have several drivers which have to parse the content of mbufs.  This
> > >>> diff suggest a central parsing function for this.  Thus, we can reduce
> > >>> redundant code.
> > >>> 
> > >>> I just start with ix(4) and ixl(4) because it was easy to test for me.
> > >>> But, this could also improve em(4), igc(4), ale(4) and oce(4).
> > >>> 
> > >>> I'm not sure about the name, the api nor the place of this code.  So, if
> > >>> someone has a better idea: i'm open to anything.
> > >> 
> > >> I like code this deduplication.
> > >> 
> > >> This newly introduced function doesn't touch ifnet but only extracts
> > >> protocol headers from mbuf(9). I guess mbuf_extract_headers() or
> > >> something like is much better for name with the ern/uipc_mbuf2.c as
> > >> place.
> > > 
> > > Good Point.  Updates diff below.
> > > 
> > > +
> > > +/* Parse different TCP/IP protocol headers for a quick view inside an 
> > > mbuf. */
> > > +void
> > > +m_exract_headers(struct mbuf *mp, struct ether_header **eh, struct ip 
> > > **ip4,
> > > +struct ip6_hdr **ip6, struct tcphdr **tcp, struct udphdr **udp)
> > > +
> > 
> > Should be m_extract_headers(). The rest of the diff looks good to me.
> > 
> 
> Please wait.
> 
> The mandatory nullification of `ip4', `ip6' and other variables passed
> to m_exract_headers() is not obvious. It is much better to return
> the integer result of extraction like m_tag_copy_chain() does.

Yes, the mandatory nullification seems to be more errorprone.  In my
opinion is the number of results it not that useful. You have to check
the retuned pointers anyway.

I moved the nullification inside of m_exract_headers().

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.189
diff -u -p -r1.189 if_ix.c
--- dev/pci/if_ix.c 2 Sep 2022 14:08:09 -   1.189
+++ dev/pci/if_ix.c 19 Jan 2023 09:29:10 -
@@ -2477,23 +2477,18 @@ static inline int
 ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
 uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
 {
-   struct ether_header *eh = mtod(mp, struct ether_header *);
-   struct mbuf *m;
-   int hoff;
+   struct ether_header *eh;
+   struct ip *ip;
+   struct ip6_hdr *ip6;
int offload = 0;
uint32_t iphlen;
uint8_t ipproto;
 
-   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   m_extract_headers(mp, &eh, &ip, &ip6, NULL, NULL);
 
-   switch (ntohs(eh->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
 
+   if (ip) {
iphlen = ip->ip_hl << 2;
ipproto = ip->ip_p;
 
@@ -2503,26 +2498,14 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
-   break;
-   }
-
 #ifdef INET6
-   case ETHERTYPE_IPV6: {
-   struct ip6_hdr *ip6;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6));
-   ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
-
+   } else if (ip6) {
iphlen = sizeof(*ip6);
ipproto = ip6->ip6_nxt;
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
-   break;
-   }
 #endif
-
-   default:
+   } else {
return offload;
}
 
Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.84
diff -u -p -r1.84 if_ixl.c
--- dev/pci/if_ixl.c5 Aug 2022 13:57:16 -   1.84
+++ dev/pci/if_ixl.c19 Jan 2023 09:29:17 -
@@ -2784,8 +2784,10 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
 static uint64_t
 ixl_tx_setup_offload(struct

Re: refactor mbuf parsing on driver level

2023-01-18 Thread Jan Klemkow
On Wed, Jan 18, 2023 at 10:50:25AM +0300, Vitaliy Makkoveev wrote:
> On Tue, Jan 17, 2023 at 11:09:17PM +0100, Jan Klemkow wrote:
> > we have several drivers which have to parse the content of mbufs.  This
> > diff suggest a central parsing function for this.  Thus, we can reduce
> > redundant code.
> > 
> > I just start with ix(4) and ixl(4) because it was easy to test for me.
> > But, this could also improve em(4), igc(4), ale(4) and oce(4).
> > 
> > I'm not sure about the name, the api nor the place of this code.  So, if
> > someone has a better idea: i'm open to anything.
> 
> I like code this deduplication.
> 
> This newly introduced function doesn't touch ifnet but only extracts
> protocol headers from mbuf(9). I guess mbuf_extract_headers() or
> something like is much better for name with the ern/uipc_mbuf2.c as
> place.

Good Point.  Updates diff below.

Thanks,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.189
diff -u -p -r1.189 if_ix.c
--- dev/pci/if_ix.c 2 Sep 2022 14:08:09 -   1.189
+++ dev/pci/if_ix.c 18 Jan 2023 21:06:58 -
@@ -2477,23 +2477,18 @@ static inline int
 ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
 uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
 {
-   struct ether_header *eh = mtod(mp, struct ether_header *);
-   struct mbuf *m;
-   int hoff;
+   struct ether_header *eh = NULL;
+   struct ip *ip = NULL;
+   struct ip6_hdr *ip6 = NULL;
int offload = 0;
uint32_t iphlen;
uint8_t ipproto;
 
-   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   m_exract_headers(mp, &eh, &ip, &ip6, NULL, NULL);
 
-   switch (ntohs(eh->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
 
+   if (ip) {
iphlen = ip->ip_hl << 2;
ipproto = ip->ip_p;
 
@@ -2503,26 +2498,14 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
-   break;
-   }
-
 #ifdef INET6
-   case ETHERTYPE_IPV6: {
-   struct ip6_hdr *ip6;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6));
-   ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
-
+   } else if (ip6) {
iphlen = sizeof(*ip6);
ipproto = ip6->ip6_nxt;
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
-   break;
-   }
 #endif
-
-   default:
+   } else {
return offload;
}
 
Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.84
diff -u -p -r1.84 if_ixl.c
--- dev/pci/if_ixl.c5 Aug 2022 13:57:16 -   1.84
+++ dev/pci/if_ixl.c18 Jan 2023 20:47:01 -
@@ -2784,12 +2784,15 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
 static uint64_t
 ixl_tx_setup_offload(struct mbuf *m0)
 {
-   struct mbuf *m;
-   int hoff;
+   struct ether_header *eh = NULL;
+   struct ip *ip = NULL;
+   struct ip6_hdr *ip6 = NULL;
+   struct tcphdr *th = NULL;
uint64_t hlen;
uint8_t ipproto;
uint64_t offload = 0;
 
+
if (ISSET(m0->m_flags, M_VLANTAG)) {
uint64_t vtag = m0->m_pkthdr.ether_vtag;
offload |= IXL_TX_DESC_CMD_IL2TAG1;
@@ -2800,39 +2803,23 @@ ixl_tx_setup_offload(struct mbuf *m0)
M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
return (offload);
 
-   switch (ntohs(mtod(m0, struct ether_header *)->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
-
-   m = m_getptr(m0, ETHER_HDR_LEN, &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+   m_exract_headers(m0, &eh, &ip, &ip6, &th, NULL);
 
+   if (ip) {
offload |= ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) ?
IXL_TX_DESC_CMD_IIPT_IPV4_CSUM :
IXL_TX_DESC_CMD_IIPT_IPV4;
  
hlen = ip->ip_hl << 2;
ipproto = ip->ip_p;
-   break;
-   }
-
 #ifdef INET6
-   case ETHERTYPE_IPV6: {
-   struct ip6_hdr *ip6;
-
-   m

Re: mem.4: be more accurate about securelevel

2023-01-18 Thread Jan Klemkow
On Tue, Jan 17, 2023 at 11:02:07PM +0100, Theo Buehler wrote:
> > at least this tool works for me:
> 
> Surely you have kern.allowkmem=1 set.

This diff should phrase it correctly.

ok?

Thanks,
Jan

Index: man4.alpha/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.alpha/mem.4,v
retrieving revision 1.6
diff -u -p -r1.6 mem.4
--- man4.alpha/mem.412 Jan 2018 04:36:44 -  1.6
+++ man4.alpha/mem.418 Jan 2023 19:25:27 -
@@ -63,11 +63,12 @@ kernel virtual memory begins at
 .Pp
 Even with sufficient file system permissions,
 these devices can only be opened when the
-.Xr securelevel 7
-is insecure or when the
 .Va kern.allowkmem
 .Xr sysctl 2
 variable is set.
+Also the
+.Xr securelevel 7
+insecure is needed, to open the device writable.
 .Sh FILES
 .Bl -tag -width /dev/kmem -compact
 .It /dev/mem
Index: man4.amd64/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.amd64/mem.4,v
retrieving revision 1.6
diff -u -p -r1.6 mem.4
--- man4.amd64/mem.412 Jan 2018 04:36:44 -  1.6
+++ man4.amd64/mem.418 Jan 2023 19:26:59 -
@@ -64,11 +64,12 @@ The kernel virtual memory begins at addr
 .Pp
 Even with sufficient file system permissions,
 these devices can only be opened when the
-.Xr securelevel 7
-is insecure or when the
 .Va kern.allowkmem
 .Xr sysctl 2
 variable is set.
+Also the
+.Xr securelevel 7
+insecure is needed, to open the device writable.
 .Sh FILES
 .Bl -tag -width Pa -compact
 .It Pa /dev/mem
Index: man4.hppa/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.hppa/mem.4,v
retrieving revision 1.4
diff -u -p -r1.4 mem.4
--- man4.hppa/mem.4 12 Jan 2018 04:36:44 -  1.4
+++ man4.hppa/mem.4 18 Jan 2023 19:29:07 -
@@ -52,11 +52,12 @@ address 0; kernel virtual memory begins 
 .Pp
 Even with sufficient file system permissions,
 these devices can only be opened when the
-.Xr securelevel 7
-is insecure or when the
 .Va kern.allowkmem
 .Xr sysctl 2
 variable is set.
+Also the
+.Xr securelevel 7
+insecure is needed, to open the device writable.
 .Sh FILES
 .Bl -tag -width /dev/kmem -compact
 .It Pa /dev/mem
Index: man4.i386/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.i386/mem.4,v
retrieving revision 1.12
diff -u -p -r1.12 mem.4
--- man4.i386/mem.4 12 Jan 2018 04:36:44 -  1.12
+++ man4.i386/mem.4 18 Jan 2023 19:30:18 -
@@ -64,11 +64,12 @@ long, and ends at virtual address
 .Pp
 Even with sufficient file system permissions,
 these devices can only be opened when the
-.Xr securelevel 7
-is insecure or when the
 .Va kern.allowkmem
 .Xr sysctl 2
 variable is set.
+Also the
+.Xr securelevel 7
+insecure is needed, to open the device writable.
 .Sh FILES
 .Bl -tag -width Pa -compact
 .It Pa /dev/mem
Index: man4.landisk/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.landisk/mem.4,v
retrieving revision 1.4
diff -u -p -r1.4 mem.4
--- man4.landisk/mem.4  12 Jan 2018 04:36:44 -  1.4
+++ man4.landisk/mem.4  18 Jan 2023 19:31:28 -
@@ -59,11 +59,12 @@ The kernel virtual memory begins at addr
 .Pp
 Even with sufficient file system permissions,
 these devices can only be opened when the
-.Xr securelevel 7
-is insecure or when the
 .Va kern.allowkmem
 .Xr sysctl 2
 variable is set.
+Also the
+.Xr securelevel 7
+insecure is needed, to open the device writable.
 .Sh FILES
 .Bl -tag -width Pa -compact
 .It Pa /dev/mem
Index: man4.loongson/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.loongson/mem.4,v
retrieving revision 1.4
diff -u -p -r1.4 mem.4
--- man4.loongson/mem.4 12 Jan 2018 04:36:44 -  1.4
+++ man4.loongson/mem.4 18 Jan 2023 19:32:44 -
@@ -89,11 +89,12 @@ The kernel virtual memory begins at addr
 .Pp
 Even with sufficient file system permissions,
 these devices can only be opened when the
-.Xr securelevel 7
-is insecure or when the
 .Va kern.allowkmem
 .Xr sysctl 2
 variable is set.
+Also the
+.Xr securelevel 7
+insecure is needed, to open the device writable.
 .Sh FILES
 .Bl -tag -width Pa -compact
 .It Pa /dev/mem
Index: man4.luna88k/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.luna88k/mem.4,v
retrieving revision 1.4
diff -u -p -r1.4 mem.4
--- man4.luna88k/mem.4  12 Jan 2018 04:36:44 -  1.4
+++ man4.luna88k/mem.4  18 Jan 2023 19:33:50 -
@@ -63,11 +63,12 @@ kernel virtual memory begins at
 .Pp
 Even with sufficient file system permissions,
 these devices can only be opened when the
-.Xr securelevel 7
-is insecure or when the
 .Va kern.allowkmem
 .Xr sysctl 2
 variable is set.
+Also the
+.Xr securelevel 7
+insecure is needed, to open the device writable.
 .Sh FILES
 .Bl -tag

Re: mem.4: be more accurate about securelevel

2023-01-17 Thread Jan Klemkow
On Tue, Jan 17, 2023 at 11:02:07PM +0100, Theo Buehler wrote:
> > at least this tool works for me:
> 
> Surely you have kern.allowkmem=1 set.

Yes, I do.



refactor mbuf parsing on driver level

2023-01-17 Thread Jan Klemkow
Hi,

we have several drivers which have to parse the content of mbufs.  This
diff suggest a central parsing function for this.  Thus, we can reduce
redundant code.

I just start with ix(4) and ixl(4) because it was easy to test for me.
But, this could also improve em(4), igc(4), ale(4) and oce(4).

I'm not sure about the name, the api nor the place of this code.  So, if
someone has a better idea: i'm open to anything.

bye,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.189
diff -u -p -r1.189 if_ix.c
--- dev/pci/if_ix.c 2 Sep 2022 14:08:09 -   1.189
+++ dev/pci/if_ix.c 17 Jan 2023 16:31:19 -
@@ -2477,23 +2477,18 @@ static inline int
 ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
 uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
 {
-   struct ether_header *eh = mtod(mp, struct ether_header *);
-   struct mbuf *m;
-   int hoff;
+   struct ether_header *eh = NULL;
+   struct ip *ip = NULL;
+   struct ip6_hdr *ip6 = NULL;
int offload = 0;
uint32_t iphlen;
uint8_t ipproto;
 
-   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   if_parse(mp, &eh, &ip, &ip6, NULL, NULL);
 
-   switch (ntohs(eh->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
 
+   if (ip) {
iphlen = ip->ip_hl << 2;
ipproto = ip->ip_p;
 
@@ -2503,26 +2498,14 @@ ixgbe_csum_offload(struct mbuf *mp, uint
}
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
-   break;
-   }
-
 #ifdef INET6
-   case ETHERTYPE_IPV6: {
-   struct ip6_hdr *ip6;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6));
-   ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
-
+   } else if (ip6) {
iphlen = sizeof(*ip6);
ipproto = ip6->ip6_nxt;
 
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
-   break;
-   }
 #endif
-
-   default:
+   } else {
return offload;
}
 
Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.84
diff -u -p -r1.84 if_ixl.c
--- dev/pci/if_ixl.c5 Aug 2022 13:57:16 -   1.84
+++ dev/pci/if_ixl.c16 Jan 2023 23:58:05 -
@@ -2784,12 +2784,15 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
 static uint64_t
 ixl_tx_setup_offload(struct mbuf *m0)
 {
-   struct mbuf *m;
-   int hoff;
+   struct ether_header *eh = NULL;
+   struct ip *ip = NULL;
+   struct ip6_hdr *ip6 = NULL;
+   struct tcphdr *th = NULL;
uint64_t hlen;
uint8_t ipproto;
uint64_t offload = 0;
 
+
if (ISSET(m0->m_flags, M_VLANTAG)) {
uint64_t vtag = m0->m_pkthdr.ether_vtag;
offload |= IXL_TX_DESC_CMD_IL2TAG1;
@@ -2800,39 +2803,23 @@ ixl_tx_setup_offload(struct mbuf *m0)
M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
return (offload);
 
-   switch (ntohs(mtod(m0, struct ether_header *)->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
-
-   m = m_getptr(m0, ETHER_HDR_LEN, &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+   if_parse(m0, &eh, &ip, &ip6, &th, NULL);
 
+   if (ip) {
offload |= ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) ?
IXL_TX_DESC_CMD_IIPT_IPV4_CSUM :
IXL_TX_DESC_CMD_IIPT_IPV4;
  
hlen = ip->ip_hl << 2;
ipproto = ip->ip_p;
-   break;
-   }
-
 #ifdef INET6
-   case ETHERTYPE_IPV6: {
-   struct ip6_hdr *ip6;
-
-   m = m_getptr(m0, ETHER_HDR_LEN, &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6));
-   ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
- 
+   } else if (ip6) {
offload |= IXL_TX_DESC_CMD_IIPT_IPV6;
 
hlen = sizeof(*ip6);
ipproto = ip6->ip6_nxt;
-   break;
-   }
 #endif
-   default:
+   } else {
panic("CSUM_OUT set for non-IP packet");
/* NOTREACHED */
}
@@ -2842,15 +2829,12 @@ ixl_tx_setup_offload(struct mbuf *m0)
 
switch (ipproto) {
case IPPROTO_TCP: {
-   struct tcphdr *th;
-
if (!ISSET(m0->m

Re: mem.4: be more accurate about securelevel

2023-01-17 Thread Jan Klemkow
On Tue, Jan 17, 2023 at 04:23:48PM -0500, Bryan Steele wrote:
> On Tue, Jan 17, 2023 at 09:37:24PM +0100, Jan Klemkow wrote:
> > Hi,
> > 
> > This diff adjust the manpage of mem(4) to be more accurate.  You can
> > open(2) mem(4) in securelevel 1 in readonly mode, but not writable.
> > 
> > kern/spec_vnops.c:
> > 
> > if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
> > ...
> > /*
> >  * When running in secure mode, do not allow opens
> >  * for writing of /dev/mem, /dev/kmem, or character
> >  * devices whose corresponding block devices are
> >  * currently mounted.
> >  */
> > if (securelevel >= 1) {
> > ...
> > if (iskmemdev(dev))
> > return (EPERM);
> > }
> > }
> > 
> > OK?
> > 
> > bye,
> > Jan
> 
> Are you sure about that? Have you tested it?
> 
> https://github.com/openbsd/src/commit/19aedf236181e81baf170421900911c82671fae4

at least this tool works for me:

#include 
#include 
#include 
#include 
#include 
#include 

#include 

int
main(void)
{
kvm_t *kd;
int mem;
struct nlist nl[] = {
{"_ix_debug_ioctl"},
{NULL}
};

char errbuf[_POSIX2_LINE_MAX];

if ((kd = kvm_open(_PATH_KSYMS, NULL, NULL, O_RDWR, errbuf)) == NULL)
errx(EXIT_FAILURE, "%s", errbuf);

if (kvm_nlist(kd, nl) == -1)
errx(EXIT_SUCCESS, "%s", kvm_geterr(kd));

if (kvm_read(kd, nl[0].n_value, &mem, sizeof mem) != sizeof(mem))
errx(EXIT_SUCCESS, "%s", kvm_geterr(kd));

printf("mem: %d\n", mem);

mem = 1;

if (kvm_write(kd, nl[0].n_value, &mem, sizeof mem) != sizeof(mem))
errx(EXIT_SUCCESS, "%s", kvm_geterr(kd));

if (kvm_close(kd) == -1)
err(EXIT_FAILURE, "kvm_close");

return EXIT_SUCCESS;
}



mem.4: be more accurate about securelevel

2023-01-17 Thread Jan Klemkow
Hi,

This diff adjust the manpage of mem(4) to be more accurate.  You can
open(2) mem(4) in securelevel 1 in readonly mode, but not writable.

kern/spec_vnops.c:

if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
...
/*
 * When running in secure mode, do not allow opens
 * for writing of /dev/mem, /dev/kmem, or character
 * devices whose corresponding block devices are
 * currently mounted.
 */
if (securelevel >= 1) {
...
if (iskmemdev(dev))
return (EPERM);
}
}

OK?

bye,
Jan

Index: man4.alpha/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.alpha/mem.4,v
retrieving revision 1.6
diff -u -p -r1.6 mem.4
--- man4.alpha/mem.412 Jan 2018 04:36:44 -  1.6
+++ man4.alpha/mem.417 Jan 2023 18:51:10 -
@@ -62,7 +62,7 @@ kernel virtual memory begins at
 .Li 0xfc23 .
 .Pp
 Even with sufficient file system permissions,
-these devices can only be opened when the
+these devices can only be opened writable when the
 .Xr securelevel 7
 is insecure or when the
 .Va kern.allowkmem
Index: man4.amd64/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.amd64/mem.4,v
retrieving revision 1.6
diff -u -p -r1.6 mem.4
--- man4.amd64/mem.412 Jan 2018 04:36:44 -  1.6
+++ man4.amd64/mem.417 Jan 2023 18:48:23 -
@@ -63,7 +63,7 @@ The kernel virtual memory begins at addr
 .Li 0x8000 .
 .Pp
 Even with sufficient file system permissions,
-these devices can only be opened when the
+these devices can only be opened writable when the
 .Xr securelevel 7
 is insecure or when the
 .Va kern.allowkmem
Index: man4.hppa/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.hppa/mem.4,v
retrieving revision 1.4
diff -u -p -r1.4 mem.4
--- man4.hppa/mem.4 12 Jan 2018 04:36:44 -  1.4
+++ man4.hppa/mem.4 17 Jan 2023 18:52:28 -
@@ -51,7 +51,7 @@ On hppa, the physical memory range is al
 address 0; kernel virtual memory begins at address 0 as well.
 .Pp
 Even with sufficient file system permissions,
-these devices can only be opened when the
+these devices can only be opened writable when the
 .Xr securelevel 7
 is insecure or when the
 .Va kern.allowkmem
Index: man4.i386/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.i386/mem.4,v
retrieving revision 1.12
diff -u -p -r1.12 mem.4
--- man4.i386/mem.4 12 Jan 2018 04:36:44 -  1.12
+++ man4.i386/mem.4 17 Jan 2023 18:53:00 -
@@ -63,7 +63,7 @@ long, and ends at virtual address
 .Li 0xfe00 .
 .Pp
 Even with sufficient file system permissions,
-these devices can only be opened when the
+these devices can only be opened writable when the
 .Xr securelevel 7
 is insecure or when the
 .Va kern.allowkmem
Index: man4.landisk/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.landisk/mem.4,v
retrieving revision 1.4
diff -u -p -r1.4 mem.4
--- man4.landisk/mem.4  12 Jan 2018 04:36:44 -  1.4
+++ man4.landisk/mem.4  17 Jan 2023 18:53:54 -
@@ -58,7 +58,7 @@ The kernel virtual memory begins at addr
 .Li 0xc000 .
 .Pp
 Even with sufficient file system permissions,
-these devices can only be opened when the
+these devices can only be opened writable when the
 .Xr securelevel 7
 is insecure or when the
 .Va kern.allowkmem
Index: man4.loongson/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.loongson/mem.4,v
retrieving revision 1.4
diff -u -p -r1.4 mem.4
--- man4.loongson/mem.4 12 Jan 2018 04:36:44 -  1.4
+++ man4.loongson/mem.4 17 Jan 2023 18:54:33 -
@@ -88,7 +88,7 @@ The kernel virtual memory begins at addr
 .Ad 0xc000 .
 .Pp
 Even with sufficient file system permissions,
-these devices can only be opened when the
+these devices can only be opened writable when the
 .Xr securelevel 7
 is insecure or when the
 .Va kern.allowkmem
Index: man4.luna88k/mem.4
===
RCS file: /cvs/src/share/man/man4/man4.luna88k/mem.4,v
retrieving revision 1.4
diff -u -p -r1.4 mem.4
--- man4.luna88k/mem.4  12 Jan 2018 04:36:44 -  1.4
+++ man4.luna88k/mem.4  17 Jan 2023 18:54:47 -
@@ -62,7 +62,7 @@ kernel virtual memory begins at
 .Ad 0x .
 .Pp
 Even with sufficient file system permissions,
-these devices can only be opened when the
+these devices can only be opened writable when the
 .Xr securelevel 7
 is insecure or when the
 .Va kern.allowkmem
Index: man4.macppc/mem.4
===

Fix kernel build without IPSEC option

2022-11-02 Thread Jan Klemkow
Hi,

if you build the kernel without IPSEC it will run into several compiler
and linker errors.  This diff add some missing #ifdefs to fix this.

ok?

bye,
jan

Index: net/if_pfsync.c
===
RCS file: /mount/openbsd/cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.305
diff -u -p -r1.305 if_pfsync.c
--- net/if_pfsync.c 21 Apr 2022 15:22:49 -  1.305
+++ net/if_pfsync.c 2 Nov 2022 10:20:38 -
@@ -1576,7 +1576,9 @@ pfsync_grab_snapshot(struct pfsync_snaps
int q;
struct pf_state *st;
struct pfsync_upd_req_item *ur;
+#if defined(IPSEC)
struct tdb *tdb;
+#endif
 
sn->sn_sc = sc;
 
@@ -1602,6 +1604,7 @@ pfsync_grab_snapshot(struct pfsync_snaps
}
 
TAILQ_INIT(&sn->sn_tdb_q);
+#if defined(IPSEC)
while ((tdb = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) {
TAILQ_REMOVE(&sc->sc_tdb_q, tdb, tdb_sync_entry);
TAILQ_INSERT_TAIL(&sn->sn_tdb_q, tdb, tdb_sync_snap);
@@ -1611,6 +1614,7 @@ pfsync_grab_snapshot(struct pfsync_snaps
SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED);
mtx_leave(&tdb->tdb_mtx);
}
+#endif
 
sn->sn_len = sc->sc_len;
sc->sc_len = PFSYNC_MINPKT;
@@ -1630,7 +1634,9 @@ pfsync_drop_snapshot(struct pfsync_snaps
 {
struct pf_state *st;
struct pfsync_upd_req_item *ur;
+#if defined(IPSEC)
struct tdb *t;
+#endif
int q;
 
for (q = 0; q < PFSYNC_S_COUNT; q++) {
@@ -1652,6 +1658,7 @@ pfsync_drop_snapshot(struct pfsync_snaps
pool_put(&sn->sn_sc->sc_pool, ur);
}
 
+#if defined(IPSEC)
while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) {
TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_snap);
mtx_enter(&t->tdb_mtx);
@@ -1660,6 +1667,7 @@ pfsync_drop_snapshot(struct pfsync_snaps
CLR(t->tdb_flags, TDBF_PFSYNC);
mtx_leave(&t->tdb_mtx);
}
+#endif
 }
 
 int
@@ -1748,7 +1756,6 @@ pfsync_sendout(void)
struct pfsync_subheader *subh;
struct pf_state *st;
struct pfsync_upd_req_item *ur;
-   struct tdb *t;
int offset;
int q, count = 0;
 
@@ -1842,7 +1849,10 @@ pfsync_sendout(void)
sn.sn_plus = NULL;  /* XXX memory leak ? */
}
 
+#if defined(IPSEC)
if (!TAILQ_EMPTY(&sn.sn_tdb_q)) {
+   struct tdb *t;
+
subh = (struct pfsync_subheader *)(m->m_data + offset);
offset += sizeof(*subh);
 
@@ -1865,6 +1875,7 @@ pfsync_sendout(void)
subh->len = sizeof(struct pfsync_tdb) >> 2;
subh->count = htons(count);
}
+#endif
 
/* walk the queues */
for (q = 0; q < PFSYNC_S_COUNT; q++) {
@@ -2486,6 +2497,7 @@ pfsync_q_del(struct pf_state *st)
pf_state_unref(st);
 }
 
+#if defined(IPSEC)
 void
 pfsync_update_tdb(struct tdb *t, int output)
 {
@@ -2540,7 +2552,9 @@ pfsync_update_tdb(struct tdb *t, int out
CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
mtx_leave(&t->tdb_mtx);
 }
+#endif
 
+#if defined(IPSEC)
 void
 pfsync_delete_tdb(struct tdb *t)
 {
@@ -2576,6 +2590,7 @@ pfsync_delete_tdb(struct tdb *t)
 
tdb_unref(t);
 }
+#endif
 
 void
 pfsync_out_tdb(struct tdb *t, void *buf)
Index: netinet/ip_ipsp.c
===
RCS file: /mount/openbsd/cvs/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.273
diff -u -p -r1.273 ip_ipsp.c
--- netinet/ip_ipsp.c   6 Aug 2022 15:57:59 -   1.273
+++ netinet/ip_ipsp.c   2 Nov 2022 12:09:22 -
@@ -1081,7 +1081,7 @@ tdb_free(struct tdb *tdbp)
tdbp->tdb_xform = NULL;
}
 
-#if NPFSYNC > 0
+#if NPFSYNC > 0 && defined(IPSEC)
/* Cleanup pfsync references */
pfsync_delete_tdb(tdbp);
 #endif



if_parse_packet(): refactor packet parsing on driver level

2022-10-24 Thread Jan Klemkow
Hi,

We have a lot of redundant code on the network device driver layer, that
parses the content of mbufs for ethernet, ip and tcp header.  This diff
introduces a new function if_parse_packet() to centralize this feature.
It just refactors ix(4) and ixl(4) code because, I could test this cards
and won't blowup this diff.  But, igc(3), ale(4) and oce(4) could also
be improved with this.  Beside of refactoring, we'll need this kind of
code in ix(4) and other drivers for better checksum and TSO support.

I'm not sure about the correct naming or place for this helper function.
Thus, nitpicking and bike shading is welcome. :)

bye,
Jan

Index: dev/pci/if_ix.c
===
RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.189
diff -u -p -r1.189 if_ix.c
--- dev/pci/if_ix.c 2 Sep 2022 14:08:09 -   1.189
+++ dev/pci/if_ix.c 24 Oct 2022 13:51:22 -
@@ -2477,25 +2477,18 @@ static inline int
 ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens,
 uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status)
 {
-   struct ether_header *eh = mtod(mp, struct ether_header *);
-   struct mbuf *m;
-   int hoff;
int offload = 0;
-   uint32_t iphlen;
uint8_t ipproto;
 
-   *vlan_macip_lens |= (sizeof(*eh) << IXGBE_ADVTXD_MACLEN_SHIFT);
+   struct if_hdr hdr;
 
-   switch (ntohs(eh->ether_type)) {
-   case ETHERTYPE_IP: {
-   struct ip *ip;
+   if_parse_packet(mp, &hdr);
 
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+   *vlan_macip_lens |= (hdr.l2len << IXGBE_ADVTXD_MACLEN_SHIFT);
 
-   iphlen = ip->ip_hl << 2;
-   ipproto = ip->ip_p;
+   switch (ntohs(hdr.eth->ether_type)) {
+   case ETHERTYPE_IP: {
+   ipproto = hdr.ip4->ip_p;
 
if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
@@ -2508,15 +2501,7 @@ ixgbe_csum_offload(struct mbuf *mp, uint
 
 #ifdef INET6
case ETHERTYPE_IPV6: {
-   struct ip6_hdr *ip6;
-
-   m = m_getptr(mp, sizeof(*eh), &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip6));
-   ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
-
-   iphlen = sizeof(*ip6);
-   ipproto = ip6->ip6_nxt;
-
+   ipproto = hdr.ip6->ip6_nxt;
*type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
break;
}
@@ -2526,7 +2511,7 @@ ixgbe_csum_offload(struct mbuf *mp, uint
return offload;
}
 
-   *vlan_macip_lens |= iphlen;
+   *vlan_macip_lens |= hdr.l3len;
 
switch (ipproto) {
case IPPROTO_TCP:
Index: dev/pci/if_ixgb.h
===
RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixgb.h,v
retrieving revision 1.19
diff -u -p -r1.19 if_ixgb.h
--- dev/pci/if_ixgb.h   24 Nov 2015 17:11:39 -  1.19
+++ dev/pci/if_ixgb.h   24 Oct 2022 13:27:43 -
@@ -54,6 +54,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
Index: dev/pci/if_ixl.c
===
RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.84
diff -u -p -r1.84 if_ixl.c
--- dev/pci/if_ixl.c5 Aug 2022 13:57:16 -   1.84
+++ dev/pci/if_ixl.c24 Oct 2022 16:34:29 -
@@ -2784,11 +2784,12 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
 static uint64_t
 ixl_tx_setup_offload(struct mbuf *m0)
 {
-   struct mbuf *m;
-   int hoff;
-   uint64_t hlen;
uint8_t ipproto;
uint64_t offload = 0;
+   struct if_hdr hdr;
+
+   memset(&hdr, 0, sizeof(hdr));
+   if_parse_packet(m0, &hdr);
 
if (ISSET(m0->m_flags, M_VLANTAG)) {
uint64_t vtag = m0->m_pkthdr.ether_vtag;
@@ -2800,35 +2801,20 @@ ixl_tx_setup_offload(struct mbuf *m0)
M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
return (offload);
 
-   switch (ntohs(mtod(m0, struct ether_header *)->ether_type)) {
+   switch (ntohs(hdr.eth->ether_type)) {
case ETHERTYPE_IP: {
-   struct ip *ip;
-
-   m = m_getptr(m0, ETHER_HDR_LEN, &hoff);
-   KASSERT(m != NULL && m->m_len - hoff >= sizeof(*ip));
-   ip = (struct ip *)(mtod(m, caddr_t) + hoff);
-
offload |= ISSET(m0->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) ?
IXL_TX_DESC_CMD_IIPT_IPV4_CSUM :
IXL_TX_DESC_CMD_IIPT_IPV4;
  
-   hlen = ip->ip_hl << 2;
-   ipproto = ip->ip_p;
+   ipproto = hdr.ip4->ip_p;
break;
}
 
 #ifdef INET6
   

Re: ix(4): enable TCPv6/UDPv6 cksum offloading

2022-01-12 Thread Jan Klemkow
On Wed, Jan 12, 2022 at 05:54:07PM +0100, Mark Kettenis wrote:
> > Date: Wed, 12 Jan 2022 17:45:57 +0100
> > From: Jan Klemkow 
> > 
> > On Wed, Jan 12, 2022 at 05:36:01PM +0100, Mark Kettenis wrote:
> > > > Date: Wed, 12 Jan 2022 17:02:03 +0100
> > > > From: Jan Klemkow 
> > > > 
> > > > Hi,
> > > > 
> > > > This diff enables TCP and UDP checksum offloading in ix(4) for IPv6.
> > > > 
> > > > IPv6 extension headers aren't a problem in this case.
> > > > in6_proto_cksum_out() in netinet6/ip6_output.c disables checksum
> > > > offloading if ip6_nxt is not TCP or UDP.  Thus, we can just use this
> > > > field.
> > > > 
> > > > Tested with:
> > > > ix0 at pci5 dev 0 function 0 "Intel 82599" rev 0x01, msix, 8 queues, 
> > > > address 00:1b:21:94:4c:48
> > > > 
> > > > OK?
> > > 
> > > Isn't this the same disaster as the ixl(4) diff you sent earlier?  We
> > > have sparc64 machines with onboard ix(4)...
> > 
> > Yes, but we don't parse the TCP header here.  As bluhm@ figured out:
> > The access to ip_hl does not generate an alignment problem on sparc64.
> > Because, the bits of ip_hl are on the other of the byte, as bits of
> > th_off.
> > 
> > This diff just touches the IPv6 case, where we don't have this kind of
> > problem, anyway.
> 
> But you're still using m_getptr(), casting the result to a struct and
> then look at a member of the struct, which may access data beyond the
> end of the mbuf.

We use the same pattern in re(4), vio(4) and the IPv4 case in ix(4).
And we check for this case with the KASSERT(), except re(4).

For me, it looks as this assumption is safe.

> > > > Index: dev/pci/if_ix.c
> > > > ===
> > > > RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
> > > > retrieving revision 1.180
> > > > diff -u -p -r1.180 if_ix.c
> > > > --- dev/pci/if_ix.c 27 Jul 2021 01:44:55 -  1.180
> > > > +++ dev/pci/if_ix.c 12 Jan 2022 14:53:14 -
> > > > @@ -1879,7 +1879,8 @@ ixgbe_setup_interface(struct ix_softc *s
> > > > ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
> > > >  #endif
> > > >  
> > > > -   ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
> > > > +   ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4
> > > > +   | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
> > > >  
> > > > /*
> > > >  * Specify the media types supported by this sc and register
> > > > @@ -2438,9 +2439,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
> > > > struct ether_header *eh;
> > > >  #endif
> > > > struct ip *ip;
> > > > -#ifdef notyet
> > > > struct ip6_hdr *ip6;
> > > > -#endif
> > > > struct mbuf *m;
> > > > int ipoff;
> > > > uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
> > > > @@ -2521,19 +2520,16 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, 
> > > > ipproto = ip->ip_p;
> > > > type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
> > > > break;
> > > > -#ifdef notyet
> > > > case ETHERTYPE_IPV6:
> > > > if (mp->m_pkthdr.len < ehdrlen + sizeof(*ip6))
> > > > return (-1);
> > > > m = m_getptr(mp, ehdrlen, &ipoff);
> > > > KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip6));
> > > > -   ip6 = (struct ip6 *)(m->m_data + ipoff);
> > > > +   ip6 = (struct ip6_hdr *)(m->m_data + ipoff);
> > > > ip_hlen = sizeof(*ip6);
> > > > -   /* XXX-BZ this will go badly in case of ext hdrs. */
> > > > ipproto = ip6->ip6_nxt;
> > > > type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
> > > > break;
> > > > -#endif
> > > > default:
> > > > offload = FALSE;
> > > > break;
> > > > Index: dev/pci/ixgbe.h
> > > > ===
> > > > RCS file: /mount/openbsd/cvs/src/sys/dev/pci/ixgbe.h,v
> > > > retrieving revision 1.32
> > > > diff -u -p -r1.32 ixgbe.h
> > > > --- dev/pci/ixgbe.h 18 Jul 2020 07:18:22 -  1.32
> > > > +++ dev/pci/ixgbe.h 12 Jan 2022 14:57:13 -
> > > > @@ -65,6 +65,7 @@
> > > >  #include 
> > > >  #include 
> > > >  #include 
> > > > +#include 
> > > >  
> > > >  #if NBPFILTER > 0
> > > >  #include 
> > > > 
> > > > 
> > > 
> > 
> 



diff: improve legibility of structs in several manpages

2021-10-26 Thread Jan Klemkow
Hello,

This diff harmonises the indentation of struct members and comments in
several manpages.  Also fixes line wraps of comments on 80 column
terminals.  General uses tabs for general indentation and 4 spaces on
tight spots.  Also uses extra space to align pointers and non-pointers
as we do this on certain places in our source.

OK?

bye,
Jan

Index: sys/kbind.2
===
RCS file: /cvs/src/lib/libc/sys/kbind.2,v
retrieving revision 1.3
diff -u -p -r1.3 kbind.2
--- sys/kbind.2 1 Sep 2016 10:08:03 -   1.3
+++ sys/kbind.2 26 Oct 2021 14:15:37 -
@@ -25,7 +25,7 @@
 .Bd -literal
 struct __kbind {
 void*kb_addr;
-size_t  kb_size;
+size_t   kb_size;
 };
 #define KBIND_BLOCK_MAX 2  /* powerpc and sparc64 need 2 blocks */
 #define KBIND_DATA_MAX  24 /* sparc64 needs 6, four-byte words */
Index: sys/kqueue.2
===
RCS file: /cvs/src/lib/libc/sys/kqueue.2,v
retrieving revision 1.44
diff -u -p -r1.44 kqueue.2
--- sys/kqueue.222 Apr 2021 15:30:12 -  1.44
+++ sys/kqueue.226 Oct 2021 14:16:16 -
@@ -141,11 +141,11 @@ The
 structure is defined as:
 .Bd -literal
 struct kevent {
-   uintptr_t  ident;   /* identifier for this event */
-   short  filter;  /* filter for event */
-   u_shortflags;   /* action flags for kqueue */
-   u_int  fflags;  /* filter flag value */
-   int64_tdata;/* filter data value */
+   uintptr_t   ident;  /* identifier for this event */
+   short   filter; /* filter for event */
+   u_short flags;  /* action flags for kqueue */
+   u_int   fflags; /* filter flag value */
+   int64_t data;   /* filter data value */
void   *udata;  /* opaque user data identifier */
 };
 .Ed
Index: sys/ktrace.2
===
RCS file: /cvs/src/lib/libc/sys/ktrace.2,v
retrieving revision 1.38
diff -u -p -r1.38 ktrace.2
--- sys/ktrace.21 Sep 2021 15:51:45 -   1.38
+++ sys/ktrace.226 Oct 2021 14:17:20 -
@@ -136,12 +136,12 @@ followed by a trace point specific struc
 The generic header is:
 .Bd -literal
 struct ktr_header {
-   uintktr_type;   /* trace record type */
-   pid_t   ktr_pid;/* process id */
-   pid_t   ktr_tid;/* thread id */
-   struct  timespec ktr_time;  /* timestamp */
-   charktr_comm[MAXCOMLEN+1];  /* command name */
-   size_t  ktr_len;/* length of buf */
+   uintktr_type;   /* trace record type */
+   pid_t   ktr_pid;/* process id */
+   pid_t   ktr_tid;/* thread id */
+   struct timespec ktr_time;   /* timestamp */
+   charktr_comm[MAXCOMLEN+1];  /* command name */
+   size_t  ktr_len;/* length of buf */
 };
 .Ed
 .Pp
Index: sys/nfssvc.2
===
RCS file: /cvs/src/lib/libc/sys/nfssvc.2,v
retrieving revision 1.23
diff -u -p -r1.23 nfssvc.2
--- sys/nfssvc.231 May 2015 23:54:25 -  1.23
+++ sys/nfssvc.226 Oct 2021 15:54:29 -
@@ -63,16 +63,16 @@ is called with the flag
 and a pointer to a structure:
 .Bd -literal
 struct nfsd_srvargs {
-struct nfsd *nsd_nfsd;   /* Pointer to in kernel nfsd struct */
-uid_t   nsd_uid;/* Effective uid mapped to cred */
-u_int32_t   nsd_haddr;  /* IP address of client */
-struct xucred   nsd_cr; /* Cred. uid maps to */
-int nsd_authlen;/* Length of auth string (ret) */
-u_char  *nsd_authstr;   /* Auth string (ret) */
-int nsd_verflen;/* and the verifier */
-u_char  *nsd_verfstr;
-struct timeval  nsd_timestamp;  /* timestamp from verifier */
-u_int32_t   nsd_ttl;/* credential ttl (sec) */
+struct nfsd*nsd_nfsd; /* Pointer to in kernel nfsd struct */
+uid_t  nsd_uid;   /* Effective uid mapped to cred */
+u_int32_t  nsd_haddr; /* IP address of client */
+struct xucred   nsd_cr;   /* Cred. uid maps to */
+intnsd_authlen;   /* Length of auth string (ret) */
+u_char*nsd_authstr;   /* Auth string (ret) */
+intnsd_verflen;   /* and the verifier */
+u_char*nsd_verfstr;
+struct timeval  nsd_timestamp; /* timestamp from verifier */
+u_int32_t  nsd_ttl;   /* credential ttl (sec) */
 };
 .Ed
 .Pp
@@ -87,9 +87,9 @@ with the flag
 and a pointer to a structure:
 .Bd -literal
 struct nfsd_args {
-int sock; /* Socket to serve */
-caddr_t name; 

Re: ixl(4): add rx/tx checksum offloading

2021-10-26 Thread Jan Klemkow
On Tue, Oct 26, 2021 at 05:17:55PM +1000, Jonathan Matthew wrote:
> First of all, thanks for looking at this, I forgot we hadn't done offloads
> for ixl(4) yet.

You're welcome.

> In the case of ixl(4), the driver has to tell the nic the length of each of 
> the
> packet headers, so it should also be tested with vlan interfaces.
> 
> I think ixl_tx_setup_offload() needs to account for outgoing vlan-tagged 
> packets.

Yes, it should.  I just want to keep this diff small for now.  I plan to
implement handling of vlan tags in a later diff.  The code just stops
processing the offload and returns, if the stack tries to send out a
vlan taged ethernet frame in the switch-statement at the beginning.

So, with vlan tags we just don't offload checksumming at the moment.

I also tested this scenario.

> It currently assumes the ethernet header is ETHER_HDR_LEN bytes long, which 
> isn't
> always true.  See ixgbe_tx_ctx_setup() (sys/dev/pci/if_ix.c) for an example of
> a driver that takes this into account.

I already looked at this code and will adapt vlan tagging later, if this
is OK for you?

Thanks,
Jan

> > Index: dev/pci/if_ixl.c
> > ===
> > RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v
> > retrieving revision 1.75
> > diff -u -p -r1.75 if_ixl.c
> > --- dev/pci/if_ixl.c23 Jul 2021 00:29:14 -  1.75
> > +++ dev/pci/if_ixl.c25 Oct 2021 15:11:46 -
> > @@ -82,6 +82,10 @@
> >  #endif
> >  
> >  #include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> >  #include 
> >  
> >  #include 
> > @@ -1388,6 +1392,7 @@ static intixl_rxeof(struct ixl_softc *,
> >  static voidixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *);
> >  static voidixl_rxrefill(void *);
> >  static int ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *);
> > +static voidixl_rx_checksum(struct mbuf *, uint64_t);
> >  
> >  #if NKSTAT > 0
> >  static voidixl_kstat_attach(struct ixl_softc *);
> > @@ -1942,9 +1947,9 @@ ixl_attach(struct device *parent, struct
> > ifp->if_capabilities = IFCAP_VLAN_MTU;
> >  #if 0
> > ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
> > -   ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 |
> > -   IFCAP_CSUM_UDPv4;
> >  #endif
> > +   ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 |
> > +   IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
> >  
> > ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status);
> >  
> > @@ -2772,6 +2777,69 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
> >  }
> >  
> >  static void
> > +ixl_tx_setup_offload(struct mbuf *mp, uint64_t *cmd)
> > +{
> > +   uint64_t ip_hdr_len;
> > +   int  ipoff = ETHER_HDR_LEN;
> > +   uint8_t  ipproto;
> > +   struct ip   *ip;
> > +#ifdef INET6
> > +   struct ip6_hdr  *ip6;
> > +#endif
> > +   struct tcphdr   *th;
> > +   struct mbuf *m;
> > +
> > +   switch (ntohs(mtod(mp, struct ether_header *)->ether_type)) {
> > +   case ETHERTYPE_IP:
> > +   if (mp->m_pkthdr.len < ETHER_HDR_LEN + sizeof(*ip))
> > +   return;
> > +   m = m_getptr(mp, ETHER_HDR_LEN, &ipoff);
> > +   KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip));
> > +   ip = (struct ip *)(m->m_data + ipoff);
> > +
> > +   if (mp->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
> > +   *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4_CSUM;
> > +   else
> > +   *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4;
> > +
> > +   ip_hdr_len = ip->ip_hl << 2;
> > +   ipproto = ip->ip_p;
> > +   break;
> > +#ifdef INET6
> > +   case ETHERTYPE_IPV6:
> > +   if (mp->m_pkthdr.len < ETHER_HDR_LEN + sizeof(*ip6))
> > +   return;
> > +   m = m_getptr(mp, ETHER_HDR_LEN, &ipoff);
> > +   KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip6));
> > +   ip6 = (struct ip6_hdr *)(m->m_data + ipoff);
> > +
> > +   *cmd |= IXL_TX_DESC_CMD_IIPT_IPV6;
> > +
> > +   ip_hdr_len = sizeof(*ip6);
> > +   ipproto = ip6->ip6_nxt;
> > +   break;
> > +#endif
> > +   default:
> > +   return;
> > +   }
> > +
> > +   *cmd |= (ETHER_HDR_LEN >> 1) << IXL_TX_DESC_MACLEN_SHIFT;
> > +   *cmd |= (ip_hdr_len >> 2) << IXL_TX_DESC_IPLEN_SHIFT;
> > +
> > +   if (ipproto == IPPROTO_TCP && m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
> > +   th = (struct tcphdr *)(m->m_data + ipoff + ip_hdr_len);
> > +
> > +   *cmd |= IXL_TX_DESC_CMD_L4T_EOFT_TCP;
> > +   *cmd |= (uint64_t)th->th_off << IXL_TX_DESC_L4LEN_SHIFT;
> > +   }
> > +
> > +   if (ipproto == IPPROTO_UDP && m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
> > +   *cmd |= IXL_TX_DESC_CMD_L4T_EOFT_UDP;
> > +   *cmd |= (sizeof(struct udphdr) >> 2) << IXL_TX_DESC_L4LEN_SHIFT;
> > +   }
> > +}
> > +
> > +static void
> >  ixl_start(struct ifqueue *ifq)
> >  {
> 

Re: ixl(4): add rx/tx checksum offloading

2021-10-25 Thread Jan Klemkow
On Fri, Oct 22, 2021 at 03:39:01PM +0200, Hrvoje Popovski wrote:
> On 22.10.2021. 13:39, Jan Klemkow wrote:
> > Thats because, you only see this flags, if the checksum offloading is
> > enabled for "sending".  I'm still working/debugging on the sending side.
> > Thus, I just send a diff with the receiving part for now.
> > 
> > You can see if its working for your card with the netstat(8) statistics.
> > 
> > # netstat -s | grep software-checksummed
> > 
> > These counters should not raise much on the receive side if you put some
> > traffic over the interface.
> 
> Thank you for explanation...
> 
> I'm sending 8 tcp streams with iperf3 from some box to openbsd ixl box
> and here are results:
> 
> without diff
> smc24# netstat -s | grep software-checksummed
> 5039250 input datagrams software-checksummed
> 2592718 output datagrams software-checksummed
> 2592709 packets software-checksummed
> 5039250 packets software-checksummed
> 0 input packets software-checksummed
> 0 output packets software-checksummed
> 
> cca 6.12 Gbits/sec
> 
> 
> 
> with diff
> smc24# netstat -s | grep software-checksummed
> 0 input datagrams software-checksummed
> 2956546 output datagrams software-checksummed
> 2956537 packets software-checksummed
> 0 packets software-checksummed
> 0 input packets software-checksummed
> 0 output packets software-checksummed
> 
> cca 6.70 Gbits/sec
> 
> are result like those expected?
> 
> is forwarding testing any good for checksum offload diffs?

Hi Hrvoje,

Thanks a lot for you big testing efforts!

In case of forwarding the forwarding box just checks the IPv4 header
checksum and ignores the UDP/TCP header.  Your setup from one Box to
another is fine.

Here is a new diff, which also includes send checksum offloading.
Thus, all software-checksummed numbers should stay low in both
directions.

Could you test this diff with your ospf{6}d and NFS tests?
If you see IPv4 fragments in the ospf and NFS traffic within tcpdump(8),
your test should find the bugs pointed out by deraadt@ and claudio@.

You can provoke large NFS packets with the following options on your NFS
mount point.

server:/export /mnt nfs ro,intr,-r65536,-w65536

Thanks,
Jan

Index: dev/pci/if_ixl.c
===
RCS file: /mount/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.75
diff -u -p -r1.75 if_ixl.c
--- dev/pci/if_ixl.c23 Jul 2021 00:29:14 -  1.75
+++ dev/pci/if_ixl.c25 Oct 2021 15:11:46 -
@@ -82,6 +82,10 @@
 #endif
 
 #include 
+#include 
+#include 
+#include 
+#include 
 #include 
 
 #include 
@@ -1388,6 +1392,7 @@ static intixl_rxeof(struct ixl_softc *,
 static voidixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *);
 static voidixl_rxrefill(void *);
 static int ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *);
+static voidixl_rx_checksum(struct mbuf *, uint64_t);
 
 #if NKSTAT > 0
 static voidixl_kstat_attach(struct ixl_softc *);
@@ -1942,9 +1947,9 @@ ixl_attach(struct device *parent, struct
ifp->if_capabilities = IFCAP_VLAN_MTU;
 #if 0
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
-   ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 |
-   IFCAP_CSUM_UDPv4;
 #endif
+   ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 |
+   IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
 
ifmedia_init(&sc->sc_media, 0, ixl_media_change, ixl_media_status);
 
@@ -2772,6 +2777,69 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
 }
 
 static void
+ixl_tx_setup_offload(struct mbuf *mp, uint64_t *cmd)
+{
+   uint64_t ip_hdr_len;
+   int  ipoff = ETHER_HDR_LEN;
+   uint8_t  ipproto;
+   struct ip   *ip;
+#ifdef INET6
+   struct ip6_hdr  *ip6;
+#endif
+   struct tcphdr   *th;
+   struct mbuf *m;
+
+   switch (ntohs(mtod(mp, struct ether_header *)->ether_type)) {
+   case ETHERTYPE_IP:
+   if (mp->m_pkthdr.len < ETHER_HDR_LEN + sizeof(*ip))
+   return;
+   m = m_getptr(mp, ETHER_HDR_LEN, &ipoff);
+   KASSERT(m != NULL && m->m_len - ipoff >= sizeof(*ip));
+   ip = (struct ip *)(m->m_data + ipoff);
+
+   if (mp->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
+   *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4_CSUM;
+   else
+   *cmd |= IXL_TX_DESC_CMD_IIPT_IPV4;
+
+   ip_hdr_len = ip->ip_hl << 2;
+   ipproto = ip->ip_p;
+   break;
+#ifdef INET6
+   case ETHERTYPE_IPV6:
+ 

ixl(4): add checksum receive offloading

2021-10-22 Thread Jan Klemkow
Hi,

this diff add hardware checksum offloading for the receive path of
ixl(4) interfaces.

Tested on:
ixl1 at pci3 dev 0 function 1 "Intel X710 SFP+" rev 0x02: port 1, FW 6.0.48442 
API 1.7, msix, 8 queues, address 40:a6:b7:02:38:3d

OK?

Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.75
diff -u -p -r1.75 if_ixl.c
--- dev/pci/if_ixl.c23 Jul 2021 00:29:14 -  1.75
+++ dev/pci/if_ixl.c22 Oct 2021 09:20:59 -
@@ -1388,6 +1388,7 @@ static intixl_rxeof(struct ixl_softc *,
 static voidixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *);
 static voidixl_rxrefill(void *);
 static int ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *);
+static voidixl_rx_checksum(struct mbuf *, uint64_t);
 
 #if NKSTAT > 0
 static voidixl_kstat_attach(struct ixl_softc *);
@@ -3190,6 +3191,7 @@ ixl_rxeof(struct ixl_softc *sc, struct i
m->m_pkthdr.csum_flags |= M_FLOWID;
}
 
+   ixl_rx_checksum(m, word);
ml_enqueue(&ml, m);
} else {
ifp->if_ierrors++; /* XXX */
@@ -3320,6 +3322,23 @@ ixl_rxrinfo(struct ixl_softc *sc, struct
free(ifr, M_TEMP, ixl_nqueues(sc) * sizeof(*ifr));
 
return (rv);
+}
+
+static void
+ixl_rx_checksum(struct mbuf *m, uint64_t word)
+{
+   if (!ISSET(word, IXL_RX_DESC_L3L4P))
+   return;
+
+   if (ISSET(word, IXL_RX_DESC_IPE))
+   return;
+
+   m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
+
+   if (ISSET(word, IXL_RX_DESC_L4E))
+   return;
+
+   m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
 }
 
 static int



Re: ixl(4): add checksum receive offloading

2021-10-22 Thread Jan Klemkow
On Fri, Oct 22, 2021 at 12:01:41PM +0200, Hrvoje Popovski wrote:
> On 22.10.2021. 11:25, Jan Klemkow wrote:
> > this diff add hardware checksum offloading for the receive path of
> > ixl(4) interfaces.
> > 
> > Tested on:
> > ixl1 at pci3 dev 0 function 1 "Intel X710 SFP+" rev 0x02: port 1, FW 
> > 6.0.48442 API 1.7, msix, 8 queues, address 40:a6:b7:02:38:3d
> > 
> > OK?
> 
> I've applied this diff and i can't see anything regarding offload with
> ifconfig ixl hwfeatures?

Hi Hrvoje,

Thats because, you only see this flags, if the checksum offloading is
enabled for "sending".  I'm still working/debugging on the sending side.
Thus, I just send a diff with the receiving part for now.

You can see if its working for your card with the netstat(8) statistics.

# netstat -s | grep software-checksummed

These counters should not raise much on the receive side if you put some
traffic over the interface.

Thanks for testing,
Jan

> smc24# ifconfig ixl0 hwfeatures
> ixl0: flags=8843 mtu 1500
> hwfeatures=10 hardmtu 9712
> lladdr 3c:fd:fe:04:0d:64
> index 7 priority 0 llprio 3
> media: Ethernet autoselect (10GSFP+Cu full-duplex)
> status: active
> inet 192.168.15.1 netmask 0xff00 broadcast 192.168.15.255
> 
> smc24# dmesg | grep ixl
> ixl0 at pci21 dev 0 function 0 "Intel X710 SFP+" rev 0x01: port 0, FW
> 8.2.64244 API 1.13, msix, 8 queues, address 3c:fd:fe:04:0d:64
> ixl1 at pci21 dev 0 function 1 "Intel X710 SFP+" rev 0x01: port 1, FW
> 8.2.64244 API 1.13, msix, 8 queues, address 3c:fd:fe:04:0d:66
> 
> > Index: dev/pci/if_ixl.c
> > ===
> > RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
> > retrieving revision 1.75
> > diff -u -p -r1.75 if_ixl.c
> > --- dev/pci/if_ixl.c23 Jul 2021 00:29:14 -  1.75
> > +++ dev/pci/if_ixl.c22 Oct 2021 09:20:59 -
> > @@ -1388,6 +1388,7 @@ static intixl_rxeof(struct ixl_softc *,
> >  static voidixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *);
> >  static voidixl_rxrefill(void *);
> >  static int ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *);
> > +static voidixl_rx_checksum(struct mbuf *, uint64_t);
> >  
> >  #if NKSTAT > 0
> >  static voidixl_kstat_attach(struct ixl_softc *);
> > @@ -3190,6 +3191,7 @@ ixl_rxeof(struct ixl_softc *sc, struct i
> > m->m_pkthdr.csum_flags |= M_FLOWID;
> > }
> >  
> > +   ixl_rx_checksum(m, word);
> > ml_enqueue(&ml, m);
> > } else {
> > ifp->if_ierrors++; /* XXX */
> > @@ -3320,6 +3322,23 @@ ixl_rxrinfo(struct ixl_softc *sc, struct
> > free(ifr, M_TEMP, ixl_nqueues(sc) * sizeof(*ifr));
> >  
> > return (rv);
> > +}
> > +
> > +static void
> > +ixl_rx_checksum(struct mbuf *m, uint64_t word)
> > +{
> > +   if (!ISSET(word, IXL_RX_DESC_L3L4P))
> > +   return;
> > +
> > +   if (ISSET(word, IXL_RX_DESC_IPE))
> > +   return;
> > +
> > +   m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
> > +
> > +   if (ISSET(word, IXL_RX_DESC_L4E))
> > +   return;
> > +
> > +   m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
> >  }
> >  
> >  static int



Re: relayd regress tcp performance

2021-09-18 Thread Jan Klemkow
On Thu, Apr 15, 2021 at 08:43:02PM +0200, Alexander Bluhm wrote:
> I found another regression with Jan's TCP diff that sends less ACK
> packets.  relayd run-args-http-slow-consumer.pl fails on i386 due
> to his commit.  This test writes a lot of data from the http server,
> but blocks receive for 2 seconds in the client.  Relayd between
> these machines should handle the delay.  The socket buffer size is
> very small to trigger the situation reliably.
> 
> The current TCP stack does not recover after the delay.  Packets
> are sent very slowly and the regress test runs in a timeout.  When
> I backout the change, the test passes quickly.
> 
> Ususally the test runs on localhost loopback.  There the problem
> is not triggered.  Only my i386 regress setup uses a remote machine.

This issue is caused by another bug in our stack.  The Stack calls
tcp_output(), but does not send an ACK with a window update, after the
consuming process empties the receive buffer in soreceive().

In normal conditions, the every other ACK feature hides this problem.
Thus, with my diff, the 200ms ACK timer is the only mechanism that sends
out ACKs.  But, this is to slow, to empty the stalled buffer fast
enough.

The following diff removes the every 2nd ACK feature again and ensures
that we send out an ACK if soreceive() empties the receive buffer.

We are so close to 7.0, that I would suggest to commit this after the
release.  Thus, we don't risk another last minute regression.

OK?

bye,
Jan

Index: netinet/tcp_input.c
===
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.370
diff -u -p -r1.370 tcp_input.c
--- netinet/tcp_input.c 9 Aug 2021 17:03:08 -   1.370
+++ netinet/tcp_input.c 18 Sep 2021 07:53:45 -
@@ -176,8 +176,7 @@ do { \
struct ifnet *ifp = NULL; \
if (m && (m->m_flags & M_PKTHDR)) \
ifp = if_get(m->m_pkthdr.ph_ifidx); \
-   if (TCP_TIMER_ISARMED(tp, TCPT_DELACK) || \
-   (tcp_ack_on_push && (tiflags) & TH_PUSH) || \
+   if ((tcp_ack_on_push && (tiflags) & TH_PUSH) || \
(ifp && (ifp->if_flags & IFF_LOOPBACK))) \
tp->t_flags |= TF_ACKNOW; \
else \
Index: netinet/tcp_usrreq.c
===
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.181
diff -u -p -r1.181 tcp_usrreq.c
--- netinet/tcp_usrreq.c30 Apr 2021 13:52:48 -  1.181
+++ netinet/tcp_usrreq.c18 Sep 2021 07:53:45 -
@@ -329,8 +329,15 @@ tcp_usrreq(struct socket *so, int req, s
 * template for a listening socket and hence the kernel
 * will panic.
 */
-   if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
+   if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) {
+   /*
+* If soreceive() empty the receive buffer, we have to
+* send a window update.
+*/
+   if (so->so_rcv.sb_cc == 0)
+   tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
+   }
break;
 
/*



enable cu(4) in amd64/GENERIC by default

2021-09-02 Thread Jan Klemkow
Hi,

The card and cables don't have the signaling lines, that getty to use it
as com(4).  But with "local" in ttys(5) it works.  I have this driver in
productive use for about 5 years now.

OK?

bye,
Jan

Index: arch/amd64/conf/GENERIC
===
RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v
retrieving revision 1.499
diff -u -p -r1.499 GENERIC
--- arch/amd64/conf/GENERIC 20 Aug 2021 05:23:18 -  1.499
+++ arch/amd64/conf/GENERIC 2 Sep 2021 08:49:22 -
@@ -403,7 +403,7 @@ com*at pcmcia?  # PCMCIA 
modems/serial
 com*   at puc?
 
 # options CY_HW_RTS
-#cy*   at pci? # PCI cyclom serial card
+cy*at pci? # PCI cyclom serial card
 #cz*   at pci? # Cyclades-Z multi-port serial boards
 
 lpt0   at isa? port 0x378 irq 7# standard PC parallel ports



Re: vmx(4): remove useless code

2021-08-06 Thread Jan Klemkow
On Fri, Aug 06, 2021 at 12:06:04PM +0200, Patrick Wildt wrote:
> On Fri, Aug 06, 2021 at 11:05:53AM +0200, Patrick Wildt wrote:
> > Am Thu, Aug 05, 2021 at 02:33:01PM +0200 schrieb Jan Klemkow:
> > > Hi,
> > > 
> > > The following diff removes useless code from the driver.  As discussed
> > > here [1] and committed there [2], the hypervisor doesn't do anything
> > > with the data structures.  We even just set NULL to the pointer since
> > > the initial commit of vmx(4).  So, I guess it better to remove all of
> > > these.  The variables are bzero'd in vmxnet3_dma_allocmem() anyway.
> > > 
> > > OK?
> > 
> > My main concern was if the structs are getting zeroed correctly, but
> > they do, so that's fine.
> > 
> > That said, it looks like Linux sets the pointer to ~0ULL, not 0.  Should
> > we follow Linux' pattern there and do that as well?
> > 
> 
> Thinking about it a little more, I think we should do that as well.  And
> maybe explicitly set driver_data_len to 0 even though it's already zero.
> Basically for readability.

OK?

Index: dev/pci/if_vmx.c
===
RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v
retrieving revision 1.66
diff -u -p -r1.66 if_vmx.c
--- dev/pci/if_vmx.c23 Jul 2021 00:29:14 -  1.66
+++ dev/pci/if_vmx.c6 Aug 2021 12:28:51 -
@@ -157,7 +157,6 @@ struct vmxnet3_softc {
 #define WRITE_BAR1(sc, reg, val) \
bus_space_write_4((sc)->sc_iot1, (sc)->sc_ioh1, reg, val)
 #define WRITE_CMD(sc, cmd) WRITE_BAR1(sc, VMXNET3_BAR1_CMD, cmd)
-#define vtophys(va) 0  /* XXX ok? */
 
 int vmxnet3_match(struct device *, void *, void *);
 void vmxnet3_attach(struct device *, struct device *, void *);
@@ -468,8 +467,8 @@ vmxnet3_dma_init(struct vmxnet3_softc *s
ds->vmxnet3_revision = 1;
ds->upt_version = 1;
ds->upt_features = UPT1_F_CSUM | UPT1_F_VLAN;
-   ds->driver_data = vtophys(sc);
-   ds->driver_data_len = sizeof(struct vmxnet3_softc);
+   ds->driver_data = ~0ULL;
+   ds->driver_data_len = 0;
ds->queue_shared = qs_pa;
ds->queue_shared_len = qs_len;
ds->mtu = VMXNET3_MAX_MTU;
@@ -546,8 +545,8 @@ vmxnet3_alloc_txring(struct vmxnet3_soft
ts->cmd_ring_len = NTXDESC;
ts->comp_ring = comp_pa;
ts->comp_ring_len = NTXCOMPDESC;
-   ts->driver_data = vtophys(tq);
-   ts->driver_data_len = sizeof *tq;
+   ts->driver_data = ~0ULL;
+   ts->driver_data_len = 0;
ts->intr_idx = intr;
ts->stopped = 1;
ts->error = 0;
@@ -598,8 +597,8 @@ vmxnet3_alloc_rxring(struct vmxnet3_soft
rs->cmd_ring_len[1] = NRXDESC;
rs->comp_ring = comp_pa;
rs->comp_ring_len = NRXCOMPDESC;
-   rs->driver_data = vtophys(rq);
-   rs->driver_data_len = sizeof *rq;
+   rs->driver_data = ~0ULL;
+   rs->driver_data_len = 0;
rs->intr_idx = intr;
rs->stopped = 1;
rs->error = 0;



vmx(4): remove useless code

2021-08-05 Thread Jan Klemkow
Hi,

The following diff removes useless code from the driver.  As discussed
here [1] and committed there [2], the hypervisor doesn't do anything
with the data structures.  We even just set NULL to the pointer since
the initial commit of vmx(4).  So, I guess it better to remove all of
these.  The variables are bzero'd in vmxnet3_dma_allocmem() anyway.

OK?

bye,
Jan

[1]: https://www.lkml.org/lkml/2021/1/19/1225
[2]: 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/net/vmxnet3/vmxnet3_drv.c?id=de1da8bcf40564a2adada2d5d5426e05355f66e8

Index: dev/pci/if_vmx.c
===
RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v
retrieving revision 1.66
diff -u -p -r1.66 if_vmx.c
--- dev/pci/if_vmx.c23 Jul 2021 00:29:14 -  1.66
+++ dev/pci/if_vmx.c5 Aug 2021 11:12:26 -
@@ -157,7 +157,6 @@ struct vmxnet3_softc {
 #define WRITE_BAR1(sc, reg, val) \
bus_space_write_4((sc)->sc_iot1, (sc)->sc_ioh1, reg, val)
 #define WRITE_CMD(sc, cmd) WRITE_BAR1(sc, VMXNET3_BAR1_CMD, cmd)
-#define vtophys(va) 0  /* XXX ok? */
 
 int vmxnet3_match(struct device *, void *, void *);
 void vmxnet3_attach(struct device *, struct device *, void *);
@@ -468,8 +467,6 @@ vmxnet3_dma_init(struct vmxnet3_softc *s
ds->vmxnet3_revision = 1;
ds->upt_version = 1;
ds->upt_features = UPT1_F_CSUM | UPT1_F_VLAN;
-   ds->driver_data = vtophys(sc);
-   ds->driver_data_len = sizeof(struct vmxnet3_softc);
ds->queue_shared = qs_pa;
ds->queue_shared_len = qs_len;
ds->mtu = VMXNET3_MAX_MTU;
@@ -546,8 +543,6 @@ vmxnet3_alloc_txring(struct vmxnet3_soft
ts->cmd_ring_len = NTXDESC;
ts->comp_ring = comp_pa;
ts->comp_ring_len = NTXCOMPDESC;
-   ts->driver_data = vtophys(tq);
-   ts->driver_data_len = sizeof *tq;
ts->intr_idx = intr;
ts->stopped = 1;
ts->error = 0;
@@ -598,8 +593,6 @@ vmxnet3_alloc_rxring(struct vmxnet3_soft
rs->cmd_ring_len[1] = NRXDESC;
rs->comp_ring = comp_pa;
rs->comp_ring_len = NRXCOMPDESC;
-   rs->driver_data = vtophys(rq);
-   rs->driver_data_len = sizeof *rq;
rs->intr_idx = intr;
rs->stopped = 1;
rs->error = 0;



Fix: tcp_output window calculation error

2021-07-22 Thread Jan Klemkow
Hi,

This calculation of the receive window has a logic error:

If win is 0 it will be overwritten by (rcv_adv - rcv_nxt).  Thus, win
will be (rcv_adv - rcv_nxt) even if its below (sb_hiwat / 4).

We could just remove the dead (sb_hiwat / 4) code, or reorder the
conditions to keep the original feature.

OK?

bye,
Jan

Index: netinet/tcp_output.c
===
RCS file: /cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.130
diff -u -p -r1.130 tcp_output.c
--- netinet/tcp_output.c8 Feb 2021 19:37:15 -   1.130
+++ netinet/tcp_output.c22 Jul 2021 12:33:13 -
@@ -812,12 +812,12 @@ send:
 * Calculate receive window.  Don't shrink window,
 * but avoid silly window syndrome.
 */
-   if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
-   win = 0;
if (win > (long)TCP_MAXWIN << tp->rcv_scale)
win = (long)TCP_MAXWIN << tp->rcv_scale;
if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt))
win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt);
+   if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)tp->t_maxseg)
+   win = 0;
if (flags & TH_RST)
win = 0;
th->th_win = htons((u_int16_t) (win>>tp->rcv_scale));



ftpd(8): Convert K&R function definitions to modern C

2021-05-30 Thread Jan Klemkow
Hi,

Convert K&R function definitions to modern C.

OK?

bye,
Jan

Index: ftpcmd.y
===
RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v
retrieving revision 1.72
diff -u -p -r1.72 ftpcmd.y
--- ftpcmd.y23 May 2021 17:01:21 -  1.72
+++ ftpcmd.y30 May 2021 15:32:50 -
@@ -1072,9 +1072,7 @@ static int yylex(void);
 extern int epsvall;
 
 static struct tab *
-lookup(p, cmd)
-   struct tab *p;
-   const char *cmd;
+lookup(struct tab *p, const char *cmd)
 {
 
for (; p->name != NULL; p++)
@@ -1089,9 +1087,7 @@ lookup(p, cmd)
  * get_line - a hacked up version of fgets to ignore TELNET escape codes.
  */
 int
-get_line(s, n)
-   char *s;
-   int n;
+get_line(char *s, int n)
 {
int c;
char *cs;
@@ -1176,8 +1172,7 @@ get_line(s, n)
 
 /*ARGSUSED*/
 void
-toolong(signo)
-   int signo;
+toolong(int signo)
 {
struct syslog_data sdata = SYSLOG_DATA_INIT;
 
@@ -1190,7 +1185,7 @@ toolong(signo)
 }
 
 static int
-yylex()
+yylex(void)
 {
static int cpos;
char *cp, *cp2;
@@ -1429,8 +1424,7 @@ yylex()
 }
 
 void
-upper(s)
-   char *s;
+upper(char *s)
 {
char *p;
 
@@ -1439,9 +1433,7 @@ upper(s)
 }
 
 static void
-help(ctab, s)
-   struct tab *ctab;
-   char *s;
+help(struct tab *ctab, char *s)
 {
struct tab *c;
int width, NCMDS;
@@ -1504,8 +1496,7 @@ help(ctab, s)
 }
 
 static void
-sizecmd(filename)
-   const char *filename;
+sizecmd(const char *filename)
 {
switch (type) {
case TYPE_L:
Index: monitor.c
===
RCS file: /cvs/src/libexec/ftpd/monitor.c,v
retrieving revision 1.28
diff -u -p -r1.28 monitor.c
--- monitor.c   20 May 2021 15:21:03 -  1.28
+++ monitor.c   30 May 2021 15:38:52 -
@@ -206,7 +206,7 @@ monitor_init(void)
  * for the user-privileged slave process and 1 for the monitor process.
  */
 int
-monitor_post_auth()
+monitor_post_auth(void)
 {
slave_pid = fork();
if (slave_pid == -1)



Re: ftpd(8): constify internal functions

2021-05-21 Thread Jan Klemkow
ping?

On Thu, May 13, 2021 at 04:44:56PM +0200, Jan Klemkow wrote:
> ping?
> 
> On Tue, May 04, 2021 at 10:50:50AM +0200, Jan Klemkow wrote:
> > Hi,
> > 
> > The following diff adds some missing consts for char * to the internal
> > program functions.
> > 
> > OK?
> > 
> > bye,
> > Jan
> > 
> > Index: extern.h
> > ===
> > RCS file: /cvs/src/libexec/ftpd/extern.h,v
> > retrieving revision 1.21
> > diff -u -p -r1.21 extern.h
> > --- extern.h15 Jan 2020 22:06:59 -  1.21
> > +++ extern.h4 May 2021 08:34:14 -
> > @@ -64,38 +64,38 @@
> >  void   blkfree(char **);
> >  char  **copyblk(char **);
> >  void   cwd(char *);
> > -void   delete(char *);
> > +void   delete(const char *);
> >  void   dologout(int);
> > -void   fatal(char *);
> > +void   fatal(const char *);
> >  intftpd_pclose(FILE *, pid_t);
> >  FILE   *ftpd_ls(const char *, pid_t *);
> >  int get_line(char *, int, FILE *);
> > -void   ftpdlogwtmp(char *, char *, char *);
> > +void   ftpdlogwtmp(const char *, const char *, const char *);
> >  void   lreply(int, const char *, ...);
> > -void   makedir(char *);
> > -void   nack(char *);
> > +void   makedir(const char *);
> > +void   nack(const char *);
> >  enum auth_ret
> > pass(char *);
> >  void   passive(void);
> >  intepsvproto2af(int);
> >  intaf2epsvproto(int);
> > -void   long_passive(char *, int);
> > +void   long_passive(const char *, int);
> >  intextended_port(const char *);
> >  void   epsv_protounsupp(const char *);
> > -void   perror_reply(int, char *);
> > +void   perror_reply(int, const char *);
> >  void   pwd(void);
> > -void   removedir(char *);
> > -void   renamecmd(char *, char *);
> > +void   removedir(const char *);
> > +void   renamecmd(const char *, const char *);
> >  char   *renamefrom(char *);
> >  void   reply(int, const char *, ...);
> >  void   reply_r(int, const char *, ...);
> >  enum ret_cmd { RET_FILE, RET_LIST };
> > -void   retrieve(enum ret_cmd, char *);
> > +void   retrieve(enum ret_cmd, const char *);
> >  void   send_file_list(char *);
> >  void   setproctitle(const char *, ...);
> >  void   statcmd(void);
> > -void   statfilecmd(char *);
> > -void   store(char *, char *, int);
> > +void   statfilecmd(const char *);
> > +void   store(const char *, const char *, int);
> >  void   upper(char *);
> >  void   user(char *);
> >  void   yyerror(char *);
> > Index: ftpcmd.y
> > ===
> > RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v
> > retrieving revision 1.69
> > diff -u -p -r1.69 ftpcmd.y
> > --- ftpcmd.y4 Mar 2020 20:17:48 -   1.69
> > +++ ftpcmd.y4 May 2021 08:34:14 -
> > @@ -1065,8 +1065,8 @@ struct tab sitetab[] = {
> >  
> >  static void help(struct tab *, char *);
> >  static struct tab *
> > -lookup(struct tab *, char *);
> > -static void sizecmd(char *);
> > +lookup(struct tab *, const char *);
> > +static void sizecmd(const char *);
> >  static int  yylex(void);
> >  
> >  extern int epsvall;
> > @@ -1074,7 +1074,7 @@ extern int epsvall;
> >  static struct tab *
> >  lookup(p, cmd)
> > struct tab *p;
> > -   char *cmd;
> > +   const char *cmd;
> >  {
> >  
> > for (; p->name != NULL; p++)
> > @@ -1508,7 +1506,7 @@ help(ctab, s)
> >  
> >  static void
> >  sizecmd(filename)
> > -   char *filename;
> > +   const char *filename;
> >  {
> > switch (type) {
> > case TYPE_L:
> > Index: ftpd.c
> > ===
> > RCS file: /cvs/src/libexec/ftpd/ftpd.c,v
> > retrieving revision 1.229
> > diff -u -p -r1.229 ftpd.c
> > --- ftpd.c  15 Jan 2020 22:06:59 -  1.229
> > +++ ftpd.c  4 May 2021 08:34:14 -
> > @@ -191,28 +191,28 @@ char  proctitle[BUFSIZ];  /* initial part 
> > (long long)(cnt)); \
> > }
> >  
> > -static void ack(char *);
> > +static void ack(const char *);
> >  static void sigurg(int);
> > 

Re: ftpd(8): remove double fflush(3) calls

2021-05-21 Thread Jan Klemkow
ping?

On Thu, May 13, 2021 at 04:44:23PM +0200, Jan Klemkow wrote:
> ping?
> 
> On Wed, May 05, 2021 at 04:42:49PM +0200, Jan Klemkow wrote:
> > Hi,
> > 
> > The function lreply() already calls fflush(3) on stdout.  So, this calls
> > are useless.
> > 
> > OK?
> > 
> > bye,
> > Jan
> > 
> > Index: ftpd.c
> > ===
> > RCS file: /cvs/src/libexec/ftpd/ftpd.c,v
> > retrieving revision 1.229
> > diff -u -p -r1.229 ftpd.c
> > --- ftpd.c  15 Jan 2020 22:06:59 -  1.229
> > +++ ftpd.c  5 May 2021 14:39:25 -
> > @@ -568,7 +568,6 @@ main(int argc, char *argv[])
> > line[strcspn(line, "\n")] = '\0';
> > lreply(530, "%s", line);
> > }
> > -   (void) fflush(stdout);
> > (void) fclose(fp);
> > reply(530, "System not available.");
> > exit(0);
> > @@ -578,7 +577,6 @@ main(int argc, char *argv[])
> > line[strcspn(line, "\n")] = '\0';
> > lreply(220, "%s", line);
> > }
> > -   (void) fflush(stdout);
> > (void) fclose(fp);
> > /* reply(220,) must follow */
> > }
> > @@ -1078,7 +1076,6 @@ pass(char *passwd)
> > line[strcspn(line, "\n")] = '\0';
> > lreply(230, "%s", line);
> > }
> > -   (void) fflush(stdout);
> > (void) fclose(fp);
> > }
> > free(motd);
> > @@ -2029,7 +2026,6 @@ cwd(char *path)
> > line[strcspn(line, "\n")] = '\0';
> > lreply(250, "%s", line);
> > }
> > -   (void) fflush(stdout);
> > (void) fclose(message);
> > }
> > ack("CWD");
> > 
> 



Re: ftpd(8): remove useless islower(3) in upper()

2021-05-21 Thread Jan Klemkow
ping?

On Thu, May 13, 2021 at 04:45:14PM +0200, Jan Klemkow wrote:
> ping?
> 
> On Sat, May 01, 2021 at 11:19:56AM +0200, Jan Klemkow wrote:
> > Hi,
> > 
> > This cleanup diff, removes a useless if islower(3) from the loop.  It is
> > guarantee by toupper(3) that no character will be changed if its not a
> > lower one.
> > 
> > man toupper(3):
> >  The toupper() and toupper_l() functions convert a lower-case letter
> >  to the corresponding upper-case letter.  The _toupper() function is
> >  identical to toupper() except that c must be a lower-case letter.
> > 
> > POSIX:
> >  ...
> >  All other arguments in the domain are returned unchanged.
> >  ...
> > 
> > OK?
> > 
> > bye,
> > Jan
> > 
> > Index: ftpcmd.y
> > ===
> > RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v
> > retrieving revision 1.69
> > diff -u -p -r1.69 ftpcmd.y
> > --- ftpcmd.y4 Mar 2020 20:17:48 -   1.69
> > +++ ftpcmd.y1 May 2021 09:09:46 -
> > @@ -1435,10 +1435,8 @@ upper(s)
> >  {
> > char *p;
> >  
> > -   for (p = s; *p; p++) {
> > -   if (islower((unsigned char)*p))
> > -   *p = (char)toupper((unsigned char)*p);
> > -   }
> > +   for (p = s; *p; p++)
> > +   *p = (char)toupper((unsigned char)*p);
> >  }
> >  
> >  static void
> > 



Re: snmpd rename context to pdutype

2021-05-18 Thread Jan Klemkow
On Fri, May 07, 2021 at 04:18:50PM +0200, Martijn van Duren wrote:
> When moving the traphandler to the snmpe process I overlooked the fact
> that "type" is being saved inside the switch statement under the
> sm_context name. RFC3411 talks about pduType, and the name context means
> something completely different in the v3 world.
> 
> The diff below moves our naming closer to the RFCs, which should
> hopefully prevent further confusion in the future.
> 
> While here I made the debug output print the pduType in a human readable
> format.
> 
> The invalid varbind check can be simplified a simple "{}" in the
> ober_scanf_elements allowing me to just drop the type variable.
> 
> OK?

I tested it and the diff looks good and legit for me.
 
> martijn@
> 
> Index: snmp.h
> ===
> RCS file: /cvs/src/usr.sbin/snmpd/snmp.h,v
> retrieving revision 1.16
> diff -u -p -r1.16 snmp.h
> --- snmp.h30 Jun 2020 17:11:49 -  1.16
> +++ snmp.h7 May 2021 14:17:12 -
> @@ -77,7 +77,7 @@ enum snmp_version {
>   SNMP_V3 = 3
>  };
>  
> -enum snmp_context {
> +enum snmp_pdutype {
>   SNMP_C_GETREQ   = 0,
>   SNMP_C_GETNEXTREQ   = 1,
>   SNMP_C_GETRESP  = 2,
> Index: snmpd.h
> ===
> RCS file: /cvs/src/usr.sbin/snmpd/snmpd.h,v
> retrieving revision 1.94
> diff -u -p -r1.94 snmpd.h
> --- snmpd.h   5 Feb 2021 10:30:45 -   1.94
> +++ snmpd.h   7 May 2021 14:17:12 -
> @@ -384,7 +384,7 @@ struct snmp_message {
>   socklen_tsm_slen;
>   int  sm_sock_tcp;
>   int  sm_aflags;
> - int  sm_type;
> + enum snmp_pdutypesm_pdutype;
>   struct event sm_sockev;
>   char sm_host[HOST_NAME_MAX+1];
>   in_port_tsm_port;
> @@ -405,7 +405,6 @@ struct snmp_message {
>  
>   /* V1, V2c */
>   char sm_community[SNMPD_MAXCOMMUNITYLEN];
> - int  sm_context;
>  
>   /* V3 */
>   long longsm_msgid;
> Index: snmpe.c
> ===
> RCS file: /cvs/src/usr.sbin/snmpd/snmpe.c,v
> retrieving revision 1.70
> diff -u -p -r1.70 snmpe.c
> --- snmpe.c   22 Feb 2021 11:31:09 -  1.70
> +++ snmpe.c   7 May 2021 14:17:12 -
> @@ -41,6 +41,7 @@
>  #include "mib.h"
>  
>  void  snmpe_init(struct privsep *, struct privsep_proc *, void *);
> +const char *snmpe_pdutype2string(enum snmp_pdutype);
>  int   snmpe_parse(struct snmp_message *);
>  void  snmpe_tryparse(int, struct snmp_message *);
>  int   snmpe_parsevarbinds(struct snmp_message *);
> @@ -194,6 +195,36 @@ snmpe_bind(struct address *addr)
>   return (-1);
>  }
>  
> +const char *
> +snmpe_pdutype2string(enum snmp_pdutype pdutype)
> +{
> + static char unknown[sizeof("Unknown (4294967295)")];
> +
> + switch (pdutype) {
> + case SNMP_C_GETREQ:
> + return "GetRequest";
> + case SNMP_C_GETNEXTREQ:
> + return "GetNextRequest";
> + case SNMP_C_GETRESP:
> + return "Response";
> + case SNMP_C_SETREQ:
> + return "SetRequest";
> + case SNMP_C_TRAP:
> + return "Trap";
> + case SNMP_C_GETBULKREQ:
> + return "GetBulkRequest";
> + case SNMP_C_INFORMREQ:
> + return "InformRequest";
> + case SNMP_C_TRAPV2:
> + return "SNMPv2-Trap";
> + case SNMP_C_REPORT:
> + return "Report";
> + }
> +
> + snprintf(unknown, sizeof(unknown), "Unknown (%u)", pdutype);
> + return unknown;
> +}
> +
>  int
>  snmpe_parse(struct snmp_message *msg)
>  {
> @@ -202,7 +233,6 @@ snmpe_parse(struct snmp_message *msg)
>   struct ber_element  *a;
>   long longver, req;
>   long longerrval, erridx;
> - unsigned int type;
>   u_intclass;
>   char*comn;
>   char*flagstr, *ctxname;
> @@ -271,15 +301,15 @@ snmpe_parse(struct snmp_message *msg)
>   goto fail;
>   }
>  
> - if (ober_scanf_elements(msg->sm_pdu, "t{e", &class, &type, &a) != 0)
> + if (ober_scanf_elements(msg->sm_pdu, "t{e", &class, &(msg->sm_pdutype),
> + &a) != 0)
>   goto parsefail;
>  
>   /* SNMP PDU context */
>   if (class != BER_CLASS_CONTEXT)
>   goto parsefail;
>  
> - msg->sm_type = type;
> - switch (type) {
> + switch (msg->sm_pdutype) {
>   case SNMP_C_GETBULKREQ:
>   if (msg->sm_version == SNMP_V1) {
>   stats->snmp_inbadversions++;
> @@ -294,7 +324,7 @@ snmpe_parse(struct snmp_message *msg)
>   /* FALLTHROUGH */
>  
>   case SNMP_C_GETNEXTREQ:
> - if (typ

ftpd(8): remove useless parameter of get_line()

2021-05-16 Thread Jan Klemkow
Hi,

This diff removes the useless FILE* parameter of get_line().  In every
call this parameter is always "stdin".  Thus, we can replace ever use of
the variable iop with stdin.

Like every other diff, I tested this diff with the ftpd regression
tests.

OK?

bye,
Jan

Index: extern.h
===
RCS file: /cvs/src/libexec/ftpd/extern.h,v
retrieving revision 1.21
diff -u -p -r1.21 extern.h
--- extern.h15 Jan 2020 22:06:59 -  1.21
+++ extern.h16 May 2021 15:36:27 -
@@ -69,7 +69,7 @@ void  dologout(int);
 void   fatal(char *);
 intftpd_pclose(FILE *, pid_t);
 FILE   *ftpd_ls(const char *, pid_t *);
-int get_line(char *, int, FILE *);
+int get_line(char *, int);
 void   ftpdlogwtmp(char *, char *, char *);
 void   lreply(int, const char *, ...);
 void   makedir(char *);
Index: ftpcmd.y
===
RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v
retrieving revision 1.69
diff -u -p -r1.69 ftpcmd.y
--- ftpcmd.y4 Mar 2020 20:17:48 -   1.69
+++ ftpcmd.y16 May 2021 15:38:07 -
@@ -1089,10 +1089,9 @@ lookup(p, cmd)
  * get_line - a hacked up version of fgets to ignore TELNET escape codes.
  */
 int
-get_line(s, n, iop)
+get_line(s, n)
char *s;
int n;
-   FILE *iop;
 {
int c;
char *cs;
@@ -,21 +1110,21 @@ get_line(s, n, iop)
if (c == 0)
tmpline[0] = '\0';
}
-   while ((c = getc(iop)) != EOF) {
+   while ((c = getc(stdin)) != EOF) {
c &= 0377;
if (c == IAC) {
-   if ((c = getc(iop)) != EOF) {
+   if ((c = getc(stdin)) != EOF) {
c &= 0377;
switch (c) {
case WILL:
case WONT:
-   c = getc(iop);
+   c = getc(stdin);
printf("%c%c%c", IAC, DONT, 0377&c);
(void) fflush(stdout);
continue;
case DO:
case DONT:
-   c = getc(iop);
+   c = getc(stdin);
printf("%c%c%c", IAC, WONT, 0377&c);
(void) fflush(stdout);
continue;
@@ -1144,7 +1143,7 @@ get_line(s, n, iop)
 * This prevents the command to be split up into
 * multiple commands.
 */
-   while (c != '\n' && (c = getc(iop)) != EOF)
+   while (c != '\n' && (c = getc(stdin)) != EOF)
;
return (-2);
}
@@ -1204,7 +1203,7 @@ yylex()
 
case CMD:
(void) alarm((unsigned) timeout);
-   n = get_line(cbuf, sizeof(cbuf)-1, stdin);
+   n = get_line(cbuf, sizeof(cbuf)-1);
if (n == -1) {
reply(221, "You could at least say goodbye.");
dologout(0);
Index: ftpd.c
===
RCS file: /cvs/src/libexec/ftpd/ftpd.c,v
retrieving revision 1.229
diff -u -p -r1.229 ftpd.c
--- ftpd.c  15 Jan 2020 22:06:59 -  1.229
+++ ftpd.c  16 May 2021 15:44:17 -
@@ -2179,7 +2179,7 @@ myoob(void)
if (!transflag)
return;
cp = tmpline;
-   ret = get_line(cp, sizeof(tmpline)-1, stdin);
+   ret = get_line(cp, sizeof(tmpline)-1);
if (ret == -1) {
reply(221, "You could at least say goodbye.");
dologout(0);



Re: ftpd(8): add pledge(2)

2021-05-13 Thread Jan Klemkow
On Thu, May 13, 2021 at 10:40:40AM -0600, Theo de Raadt wrote:
> +   if (pledge("stdio rpath inet recvfd sendfd "
> +   "wpath cpath proc tty getpw", NULL) == -1)
> 
> Please change the order:
> 
> stdio rpath wpath cpath inet recvfd sendfd proc tty getpw
> 
> (It remains extremely permissive).

Yes.  Further refactoring may reduce the needed syscalls in the future?

OK?

Thanks,
Jan

Index: monitor.c
===
RCS file: /cvs/src/libexec/ftpd/monitor.c,v
retrieving revision 1.26
diff -u -p -r1.26 monitor.c
--- monitor.c   28 Jun 2019 13:32:53 -  1.26
+++ monitor.c   13 May 2021 17:12:18 -
@@ -295,11 +295,17 @@ handle_cmds(void)
sizeof(slavequit));
break;
case AUTH_SLAVE:
+   if (pledge("stdio rpath wpath cpath inet recvfd"
+  " sendfd proc tty getpw", NULL) == -1)
+   fatalx("pledge");
/* User-privileged slave */
debugmsg("user-privileged slave started");
return;
/* NOTREACHED */
case AUTH_MONITOR:
+   if (pledge("stdio inet sendfd recvfd proc",
+   NULL) == -1)
+   fatalx("pledge");
/* Post-auth monitor */
debugmsg("monitor went into post-auth phase");
state = POSTAUTH;



Re: ftpd(8): constify internal functions

2021-05-13 Thread Jan Klemkow
ping?

On Tue, May 04, 2021 at 10:50:50AM +0200, Jan Klemkow wrote:
> Hi,
> 
> The following diff adds some missing consts for char * to the internal
> program functions.
> 
> OK?
> 
> bye,
> Jan
> 
> Index: extern.h
> ===
> RCS file: /cvs/src/libexec/ftpd/extern.h,v
> retrieving revision 1.21
> diff -u -p -r1.21 extern.h
> --- extern.h  15 Jan 2020 22:06:59 -  1.21
> +++ extern.h  4 May 2021 08:34:14 -
> @@ -64,38 +64,38 @@
>  void blkfree(char **);
>  char  **copyblk(char **);
>  void cwd(char *);
> -void delete(char *);
> +void delete(const char *);
>  void dologout(int);
> -void fatal(char *);
> +void fatal(const char *);
>  int  ftpd_pclose(FILE *, pid_t);
>  FILE   *ftpd_ls(const char *, pid_t *);
>  int get_line(char *, int, FILE *);
> -void ftpdlogwtmp(char *, char *, char *);
> +void ftpdlogwtmp(const char *, const char *, const char *);
>  void lreply(int, const char *, ...);
> -void makedir(char *);
> -void nack(char *);
> +void makedir(const char *);
> +void nack(const char *);
>  enum auth_ret
>   pass(char *);
>  void passive(void);
>  int  epsvproto2af(int);
>  int  af2epsvproto(int);
> -void long_passive(char *, int);
> +void long_passive(const char *, int);
>  int  extended_port(const char *);
>  void epsv_protounsupp(const char *);
> -void perror_reply(int, char *);
> +void perror_reply(int, const char *);
>  void pwd(void);
> -void removedir(char *);
> -void renamecmd(char *, char *);
> +void removedir(const char *);
> +void renamecmd(const char *, const char *);
>  char   *renamefrom(char *);
>  void reply(int, const char *, ...);
>  void reply_r(int, const char *, ...);
>  enum ret_cmd { RET_FILE, RET_LIST };
> -void retrieve(enum ret_cmd, char *);
> +void retrieve(enum ret_cmd, const char *);
>  void send_file_list(char *);
>  void setproctitle(const char *, ...);
>  void statcmd(void);
> -void statfilecmd(char *);
> -void store(char *, char *, int);
> +void statfilecmd(const char *);
> +void store(const char *, const char *, int);
>  void upper(char *);
>  void user(char *);
>  void yyerror(char *);
> Index: ftpcmd.y
> ===
> RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v
> retrieving revision 1.69
> diff -u -p -r1.69 ftpcmd.y
> --- ftpcmd.y  4 Mar 2020 20:17:48 -   1.69
> +++ ftpcmd.y  4 May 2021 08:34:14 -
> @@ -1065,8 +1065,8 @@ struct tab sitetab[] = {
>  
>  static void   help(struct tab *, char *);
>  static struct tab *
> -  lookup(struct tab *, char *);
> -static void   sizecmd(char *);
> +  lookup(struct tab *, const char *);
> +static void   sizecmd(const char *);
>  static intyylex(void);
>  
>  extern int epsvall;
> @@ -1074,7 +1074,7 @@ extern int epsvall;
>  static struct tab *
>  lookup(p, cmd)
>   struct tab *p;
> - char *cmd;
> + const char *cmd;
>  {
>  
>   for (; p->name != NULL; p++)
> @@ -1508,7 +1506,7 @@ help(ctab, s)
>  
>  static void
>  sizecmd(filename)
> - char *filename;
> + const char *filename;
>  {
>   switch (type) {
>   case TYPE_L:
> Index: ftpd.c
> ===
> RCS file: /cvs/src/libexec/ftpd/ftpd.c,v
> retrieving revision 1.229
> diff -u -p -r1.229 ftpd.c
> --- ftpd.c15 Jan 2020 22:06:59 -  1.229
> +++ ftpd.c4 May 2021 08:34:14 -
> @@ -191,28 +191,28 @@ charproctitle[BUFSIZ];  /* initial part 
>   (long long)(cnt)); \
>   }
>  
> -static void   ack(char *);
> +static void   ack(const char *);
>  static void   sigurg(int);
>  static void   myoob(void);
> -static intcheckuser(char *, char *);
> -static FILE  *dataconn(char *, off_t, char *);
> +static intcheckuser(char *, const char *);
> +static FILE  *dataconn(const char *, off_t, char *);
>  static void   dolog(struct sockaddr *);
>  static char  *copy_dir(char *, struct passwd *);
>  static char  *curdir(void);
>  static void   end_login(void);
>  static FILE  *getdatasock(char *);
> -static intguniquefd(char *, char **);
> +static intguniquefd(const char *, char **);
>  static void   lostconn(int);
>  static void   sigquit(int);
>  static intreceive_data(FILE *, FILE *);
>  static void   replydirname(const char *, const char *);
>  static intsend_data(FILE *, FILE *, off_t, off_t, int);
>  static struct passwd *
> -  sgetpwnam(char *, struct passwd *);
> +  sgetpwnam(const char *, struct passwd *);
>  static void 

Re: ftpd(8): remove useless islower(3) in upper()

2021-05-13 Thread Jan Klemkow
ping?

On Sat, May 01, 2021 at 11:19:56AM +0200, Jan Klemkow wrote:
> Hi,
> 
> This cleanup diff, removes a useless if islower(3) from the loop.  It is
> guarantee by toupper(3) that no character will be changed if its not a
> lower one.
> 
> man toupper(3):
>  The toupper() and toupper_l() functions convert a lower-case letter
>  to the corresponding upper-case letter.  The _toupper() function is
>  identical to toupper() except that c must be a lower-case letter.
> 
> POSIX:
>  ...
>  All other arguments in the domain are returned unchanged.
>  ...
> 
> OK?
> 
> bye,
> Jan
> 
> Index: ftpcmd.y
> ===
> RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v
> retrieving revision 1.69
> diff -u -p -r1.69 ftpcmd.y
> --- ftpcmd.y  4 Mar 2020 20:17:48 -   1.69
> +++ ftpcmd.y  1 May 2021 09:09:46 -
> @@ -1435,10 +1435,8 @@ upper(s)
>  {
>   char *p;
>  
> - for (p = s; *p; p++) {
> - if (islower((unsigned char)*p))
> - *p = (char)toupper((unsigned char)*p);
> - }
> + for (p = s; *p; p++)
> + *p = (char)toupper((unsigned char)*p);
>  }
>  
>  static void
> 



Re: ftpd(8): remove double fflush(3) calls

2021-05-13 Thread Jan Klemkow
ping?

On Wed, May 05, 2021 at 04:42:49PM +0200, Jan Klemkow wrote:
> Hi,
> 
> The function lreply() already calls fflush(3) on stdout.  So, this calls
> are useless.
> 
> OK?
> 
> bye,
> Jan
> 
> Index: ftpd.c
> ===
> RCS file: /cvs/src/libexec/ftpd/ftpd.c,v
> retrieving revision 1.229
> diff -u -p -r1.229 ftpd.c
> --- ftpd.c15 Jan 2020 22:06:59 -  1.229
> +++ ftpd.c5 May 2021 14:39:25 -
> @@ -568,7 +568,6 @@ main(int argc, char *argv[])
>   line[strcspn(line, "\n")] = '\0';
>   lreply(530, "%s", line);
>   }
> - (void) fflush(stdout);
>   (void) fclose(fp);
>   reply(530, "System not available.");
>   exit(0);
> @@ -578,7 +577,6 @@ main(int argc, char *argv[])
>   line[strcspn(line, "\n")] = '\0';
>   lreply(220, "%s", line);
>   }
> - (void) fflush(stdout);
>   (void) fclose(fp);
>   /* reply(220,) must follow */
>   }
> @@ -1078,7 +1076,6 @@ pass(char *passwd)
>   line[strcspn(line, "\n")] = '\0';
>   lreply(230, "%s", line);
>   }
> - (void) fflush(stdout);
>   (void) fclose(fp);
>   }
>   free(motd);
> @@ -2029,7 +2026,6 @@ cwd(char *path)
>   line[strcspn(line, "\n")] = '\0';
>   lreply(250, "%s", line);
>   }
> - (void) fflush(stdout);
>   (void) fclose(message);
>   }
>   ack("CWD");
> 



ftpd(8): add pledge(2)

2021-05-13 Thread Jan Klemkow
Hi,

This is the first attempt to bring pledge into ftpd.  The Main ftpd
process can't use pledge for now because of possible chroot(2) calls.
But, the two forks after user login are pledged with this diff.

I tested it manually and with the ftpd's regression tests.

OK?

bye,
Jan

Index: monitor.c
===
RCS file: /cvs/src/libexec/ftpd/monitor.c,v
retrieving revision 1.26
diff -u -p -r1.26 monitor.c
--- monitor.c   28 Jun 2019 13:32:53 -  1.26
+++ monitor.c   13 May 2021 14:31:35 -
@@ -295,11 +295,17 @@ handle_cmds(void)
sizeof(slavequit));
break;
case AUTH_SLAVE:
+   if (pledge("stdio rpath inet recvfd sendfd "
+   "wpath cpath proc tty getpw", NULL) == -1)
+   fatalx("pledge");
/* User-privileged slave */
debugmsg("user-privileged slave started");
return;
/* NOTREACHED */
case AUTH_MONITOR:
+   if (pledge("stdio inet sendfd recvfd proc",
+   NULL) == -1)
+   fatalx("pledge");
/* Post-auth monitor */
debugmsg("monitor went into post-auth phase");
state = POSTAUTH;



Re: services(5): add default ftps ports

2021-05-07 Thread Jan Klemkow
On Thu, May 06, 2021 at 11:09:03AM -0600, Theo de Raadt wrote:
> Jan Klemkow  wrote:
> 
> > > > > I'm working on a diff to bring ftps with libtls into our ftpd(8).  
> > > > > There
> > > > > is a "getaddrinfo(NULL, "ftps", &hints, &res0)" call, which uses this
> > > > > port.  Thus, I made this change.
> > > > 
> > > > Hang on -- does the world want ftps support?
> > 
> > I don't know, what "the world" wants.  But, I want ftps.  As far as I
> > can see, ftps is the only way to bring our ftpd(8) into the 21st
> > century.
> 
> I have a really hard time with that.
> 
> The protocol is completely broken, and in a way that adding TLS makes it
> even worse.

OK.  And what should we do with ftpd(8)?

I see just three ways:

 1. Prepare it for usage in modern internet with crypto support.
 2. Just use it for anonymous public file distribution.
 3. Remove the daemon.

In my opinion the protocol is not that bad and our daemon just need some
refactoring and encryption support.



Re: services(5): add default ftps ports

2021-05-06 Thread Jan Klemkow
On Thu, May 06, 2021 at 06:36:52PM +0200, Mark Kettenis wrote:
> > From: "Theo de Raadt" 
> > Date: Thu, 06 May 2021 10:26:31 -0600
> > 
> > Jan Klemkow  wrote:
> > 
> > > On Wed, May 05, 2021 at 12:18:43PM -0600, Theo de Raadt wrote:
> > > > I would like a further justification for removing these ports from
> > > > the very limited dynamic reserved space used by bindresvport.
> > > > 
> > > > (but not by rresvport, which appears still stomp over them)
> > > > 
> > > > For tcp, 32 of the 512 are locked out.
> > > > For udp, 19.
> > > > 
> > > > What software is actually using these ports?
> > > > 
> > > > Is that software irrelevant these days?
> > > 
> > > I'm working on a diff to bring ftps with libtls into our ftpd(8).  There
> > > is a "getaddrinfo(NULL, "ftps", &hints, &res0)" call, which uses this
> > > port.  Thus, I made this change.
> > 
> > Hang on -- does the world want ftps support?

I don't know, what "the world" wants.  But, I want ftps.  As far as I
can see, ftps is the only way to bring our ftpd(8) into the 21st
century.

I use ftp in my private local setup.  I also want to use over public
internet in the future, like I did in the past.  Thats why I'm working
on it.
 
> I was going to ask the same thing.  I mean even with encryption the
> FTP protocol still is a bad idea given all the problems with NAT
> traversal and such.

In don't use NAT or packet filters in my setup.  With IPv6 there is no
active FTP problem.



Re: services(5): add default ftps ports

2021-05-06 Thread Jan Klemkow
On Wed, May 05, 2021 at 12:18:43PM -0600, Theo de Raadt wrote:
> I would like a further justification for removing these ports from
> the very limited dynamic reserved space used by bindresvport.
> 
> (but not by rresvport, which appears still stomp over them)
> 
> For tcp, 32 of the 512 are locked out.
> For udp, 19.
> 
> What software is actually using these ports?
> 
> Is that software irrelevant these days?

I'm working on a diff to bring ftps with libtls into our ftpd(8).  There
is a "getaddrinfo(NULL, "ftps", &hints, &res0)" call, which uses this
port.  Thus, I made this change.

> Jan Klemkow  wrote:
> > On Wed, May 05, 2021 at 11:09:12AM +0100, Stuart Henderson wrote:
> > > On 2021/05/04 12:07, Jan Klemkow wrote:
> > > > Add missing ftps defaults ports to servies(5).
> > > > 
> > > > Index: services
> > > > ===
> > > > RCS file: /cvs/src/etc/services,v
> > > > retrieving revision 1.99
> > > > diff -u -p -r1.99 services
> > > > --- services18 Feb 2021 02:30:29 -  1.99
> > > > +++ services4 May 2021 10:01:35 -
> > > > @@ -318,6 +318,10 @@ krb_prop   754/tcp hprop   # 
> > > > Kerberos slav
> > > >  krbupdate  760/tcp kreg# BSD Kerberos 
> > > > registration
> > > >  supfilesrv 871/tcp # SUP server
> > > >  swat   901/tcp # Samba Web 
> > > > Administration Tool
> > > > +ftps-data  989/tcp # ftp data over TLS/SSL
> > > > +ftps-data  989/udp # ftp data over TLS/SSL
> > > > +ftps   990/tcp # ftp control over 
> > > > TLS/SSL
> > > > +ftps   990/udp # ftp control over 
> > > > TLS/SSL
> > > 
> > > I'm OK with adding the TCP ones (though ftp-over-tls always makes me
> > > want to rant...). It's not going to run on UDP though so I think those
> > > should not be added.
> > 
> > OK?
> > 
> > Index: services
> > ===
> > RCS file: /cvs/src/etc/services,v
> > retrieving revision 1.99
> > diff -u -p -r1.99 services
> > --- services18 Feb 2021 02:30:29 -  1.99
> > +++ services5 May 2021 12:24:29 -
> > @@ -318,6 +318,8 @@ krb_prop754/tcp hprop   # 
> > Kerberos slav
> >  krbupdate  760/tcp kreg# BSD Kerberos registration
> >  supfilesrv 871/tcp # SUP server
> >  swat   901/tcp # Samba Web 
> > Administration Tool
> > +ftps-data  989/tcp # ftp data over TLS
> > +ftps   990/tcp # ftp control over TLS
> >  supfiledbg 1127/tcp# SUP debugging
> >  support1529/tcp# GNATS, cygnus bug 
> > tracker
> >  datametrics1645/udp
> > 
> 



ftpd(8): remove double fflush(3) calls

2021-05-05 Thread Jan Klemkow
Hi,

The function lreply() already calls fflush(3) on stdout.  So, this calls
are useless.

OK?

bye,
Jan

Index: ftpd.c
===
RCS file: /cvs/src/libexec/ftpd/ftpd.c,v
retrieving revision 1.229
diff -u -p -r1.229 ftpd.c
--- ftpd.c  15 Jan 2020 22:06:59 -  1.229
+++ ftpd.c  5 May 2021 14:39:25 -
@@ -568,7 +568,6 @@ main(int argc, char *argv[])
line[strcspn(line, "\n")] = '\0';
lreply(530, "%s", line);
}
-   (void) fflush(stdout);
(void) fclose(fp);
reply(530, "System not available.");
exit(0);
@@ -578,7 +577,6 @@ main(int argc, char *argv[])
line[strcspn(line, "\n")] = '\0';
lreply(220, "%s", line);
}
-   (void) fflush(stdout);
(void) fclose(fp);
/* reply(220,) must follow */
}
@@ -1078,7 +1076,6 @@ pass(char *passwd)
line[strcspn(line, "\n")] = '\0';
lreply(230, "%s", line);
}
-   (void) fflush(stdout);
(void) fclose(fp);
}
free(motd);
@@ -2029,7 +2026,6 @@ cwd(char *path)
line[strcspn(line, "\n")] = '\0';
lreply(250, "%s", line);
}
-   (void) fflush(stdout);
(void) fclose(message);
}
ack("CWD");



Re: services(5): add default ftps ports

2021-05-05 Thread Jan Klemkow
On Wed, May 05, 2021 at 11:09:12AM +0100, Stuart Henderson wrote:
> On 2021/05/04 12:07, Jan Klemkow wrote:
> > Add missing ftps defaults ports to servies(5).
> > 
> > Index: services
> > ===
> > RCS file: /cvs/src/etc/services,v
> > retrieving revision 1.99
> > diff -u -p -r1.99 services
> > --- services18 Feb 2021 02:30:29 -  1.99
> > +++ services4 May 2021 10:01:35 -
> > @@ -318,6 +318,10 @@ krb_prop   754/tcp hprop   # 
> > Kerberos slav
> >  krbupdate  760/tcp kreg# BSD Kerberos registration
> >  supfilesrv 871/tcp # SUP server
> >  swat   901/tcp # Samba Web 
> > Administration Tool
> > +ftps-data  989/tcp # ftp data over TLS/SSL
> > +ftps-data  989/udp # ftp data over TLS/SSL
> > +ftps   990/tcp # ftp control over 
> > TLS/SSL
> > +ftps   990/udp # ftp control over 
> > TLS/SSL
> 
> I'm OK with adding the TCP ones (though ftp-over-tls always makes me
> want to rant...). It's not going to run on UDP though so I think those
> should not be added.

OK?

Index: services
===
RCS file: /cvs/src/etc/services,v
retrieving revision 1.99
diff -u -p -r1.99 services
--- services18 Feb 2021 02:30:29 -  1.99
+++ services5 May 2021 12:24:29 -
@@ -318,6 +318,8 @@ krb_prop754/tcp hprop   # Kerberos slav
 krbupdate  760/tcp kreg# BSD Kerberos registration
 supfilesrv 871/tcp # SUP server
 swat   901/tcp # Samba Web Administration Tool
+ftps-data  989/tcp # ftp data over TLS
+ftps   990/tcp # ftp control over TLS
 supfiledbg 1127/tcp# SUP debugging
 support1529/tcp# GNATS, cygnus bug 
tracker
 datametrics1645/udp



services(5): add default ftps ports

2021-05-04 Thread Jan Klemkow
Hi,

Add missing ftps defaults ports to servies(5).

OK?

bye,
Jan

Index: services
===
RCS file: /cvs/src/etc/services,v
retrieving revision 1.99
diff -u -p -r1.99 services
--- services18 Feb 2021 02:30:29 -  1.99
+++ services4 May 2021 10:01:35 -
@@ -318,6 +318,10 @@ krb_prop   754/tcp hprop   # Kerberos slav
 krbupdate  760/tcp kreg# BSD Kerberos registration
 supfilesrv 871/tcp # SUP server
 swat   901/tcp # Samba Web Administration Tool
+ftps-data  989/tcp # ftp data over TLS/SSL
+ftps-data  989/udp # ftp data over TLS/SSL
+ftps   990/tcp # ftp control over TLS/SSL
+ftps   990/udp # ftp control over TLS/SSL
 supfiledbg 1127/tcp# SUP debugging
 support1529/tcp# GNATS, cygnus bug 
tracker
 datametrics1645/udp



ftpd(8): constify internal functions

2021-05-04 Thread Jan Klemkow
Hi,

The following diff adds some missing consts for char * to the internal
program functions.

OK?

bye,
Jan

Index: extern.h
===
RCS file: /cvs/src/libexec/ftpd/extern.h,v
retrieving revision 1.21
diff -u -p -r1.21 extern.h
--- extern.h15 Jan 2020 22:06:59 -  1.21
+++ extern.h4 May 2021 08:34:14 -
@@ -64,38 +64,38 @@
 void   blkfree(char **);
 char  **copyblk(char **);
 void   cwd(char *);
-void   delete(char *);
+void   delete(const char *);
 void   dologout(int);
-void   fatal(char *);
+void   fatal(const char *);
 intftpd_pclose(FILE *, pid_t);
 FILE   *ftpd_ls(const char *, pid_t *);
 int get_line(char *, int, FILE *);
-void   ftpdlogwtmp(char *, char *, char *);
+void   ftpdlogwtmp(const char *, const char *, const char *);
 void   lreply(int, const char *, ...);
-void   makedir(char *);
-void   nack(char *);
+void   makedir(const char *);
+void   nack(const char *);
 enum auth_ret
pass(char *);
 void   passive(void);
 intepsvproto2af(int);
 intaf2epsvproto(int);
-void   long_passive(char *, int);
+void   long_passive(const char *, int);
 intextended_port(const char *);
 void   epsv_protounsupp(const char *);
-void   perror_reply(int, char *);
+void   perror_reply(int, const char *);
 void   pwd(void);
-void   removedir(char *);
-void   renamecmd(char *, char *);
+void   removedir(const char *);
+void   renamecmd(const char *, const char *);
 char   *renamefrom(char *);
 void   reply(int, const char *, ...);
 void   reply_r(int, const char *, ...);
 enum ret_cmd { RET_FILE, RET_LIST };
-void   retrieve(enum ret_cmd, char *);
+void   retrieve(enum ret_cmd, const char *);
 void   send_file_list(char *);
 void   setproctitle(const char *, ...);
 void   statcmd(void);
-void   statfilecmd(char *);
-void   store(char *, char *, int);
+void   statfilecmd(const char *);
+void   store(const char *, const char *, int);
 void   upper(char *);
 void   user(char *);
 void   yyerror(char *);
Index: ftpcmd.y
===
RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v
retrieving revision 1.69
diff -u -p -r1.69 ftpcmd.y
--- ftpcmd.y4 Mar 2020 20:17:48 -   1.69
+++ ftpcmd.y4 May 2021 08:34:14 -
@@ -1065,8 +1065,8 @@ struct tab sitetab[] = {
 
 static void help(struct tab *, char *);
 static struct tab *
-lookup(struct tab *, char *);
-static void sizecmd(char *);
+lookup(struct tab *, const char *);
+static void sizecmd(const char *);
 static int  yylex(void);
 
 extern int epsvall;
@@ -1074,7 +1074,7 @@ extern int epsvall;
 static struct tab *
 lookup(p, cmd)
struct tab *p;
-   char *cmd;
+   const char *cmd;
 {
 
for (; p->name != NULL; p++)
@@ -1508,7 +1506,7 @@ help(ctab, s)
 
 static void
 sizecmd(filename)
-   char *filename;
+   const char *filename;
 {
switch (type) {
case TYPE_L:
Index: ftpd.c
===
RCS file: /cvs/src/libexec/ftpd/ftpd.c,v
retrieving revision 1.229
diff -u -p -r1.229 ftpd.c
--- ftpd.c  15 Jan 2020 22:06:59 -  1.229
+++ ftpd.c  4 May 2021 08:34:14 -
@@ -191,28 +191,28 @@ char  proctitle[BUFSIZ];  /* initial part 
(long long)(cnt)); \
}
 
-static void ack(char *);
+static void ack(const char *);
 static void sigurg(int);
 static void myoob(void);
-static int  checkuser(char *, char *);
-static FILE*dataconn(char *, off_t, char *);
+static int  checkuser(char *, const char *);
+static FILE*dataconn(const char *, off_t, char *);
 static void dolog(struct sockaddr *);
 static char*copy_dir(char *, struct passwd *);
 static char*curdir(void);
 static void end_login(void);
 static FILE*getdatasock(char *);
-static int  guniquefd(char *, char **);
+static int  guniquefd(const char *, char **);
 static void lostconn(int);
 static void sigquit(int);
 static int  receive_data(FILE *, FILE *);
 static void replydirname(const char *, const char *);
 static int  send_data(FILE *, FILE *, off_t, off_t, int);
 static struct passwd *
-sgetpwnam(char *, struct passwd *);
+sgetpwnam(const char *, struct passwd *);
 static void reapchild(int);
 static void usage(void);
 
-voidlogxfer(char *, off_t, time_t);
+voidlogxfer(const char *, off_t, time_t);
 voidset_slave_signals(void);
 
 static char *
@@ -638,7 +638,7 @@ sigquit(int signo)
  * (e.g., globbing).
  */
 static struct passwd *
-sgetpwnam(char *name, struct passwd *pw)
+sgetpwnam(const char *name, struct passwd *pw)
 {
static struct passwd *save;
struct passwd *old;
@@ -819,7 +819,7 @@ user(char *name)
  * Check if a user is in the file "fname"
  */
 static int
-checkuser(char *fname, char *name)
+checkuser(char *fna

ftpd(8): remove useless islower(3) in upper()

2021-05-01 Thread Jan Klemkow
Hi,

This cleanup diff, removes a useless if islower(3) from the loop.  It is
guarantee by toupper(3) that no character will be changed if its not a
lower one.

man toupper(3):
 The toupper() and toupper_l() functions convert a lower-case letter
 to the corresponding upper-case letter.  The _toupper() function is
 identical to toupper() except that c must be a lower-case letter.

POSIX:
 ...
 All other arguments in the domain are returned unchanged.
 ...

OK?

bye,
Jan

Index: ftpcmd.y
===
RCS file: /cvs/src/libexec/ftpd/ftpcmd.y,v
retrieving revision 1.69
diff -u -p -r1.69 ftpcmd.y
--- ftpcmd.y4 Mar 2020 20:17:48 -   1.69
+++ ftpcmd.y1 May 2021 09:09:46 -
@@ -1435,10 +1435,8 @@ upper(s)
 {
char *p;
 
-   for (p = s; *p; p++) {
-   if (islower((unsigned char)*p))
-   *p = (char)toupper((unsigned char)*p);
-   }
+   for (p = s; *p; p++)
+   *p = (char)toupper((unsigned char)*p);
 }
 
 static void



fyi: get HP EliteBook 830 G7/G8 booting

2021-03-26 Thread Jan Klemkow
Hi,

If you want to boot OpenBSD on an HP EliteBook 830 G7/G8, the bootloader
will hang while loading the kernel.  Because, the UEFI loads the
bootloader on the same place in memory, where the bootloader will copy
the kernel.  We are unable to load the kernel on arbitrary memory.
Thus, the following diff will help you, to get OpenBSD running on these
machines.  It moves the hardcoded Kernel address to a free place.

I don't intend to commit this.  Its just a hint for people who are
running in the same issues, as I did.

bye,
Jan

Index: arch/amd64/conf//ld.script
===
RCS file: /cvs/src/sys/arch/amd64/conf/ld.script,v
retrieving revision 1.17
diff -u -p -r1.17 ld.script
--- arch/amd64/conf//ld.script  7 Mar 2021 23:10:54 -   1.17
+++ arch/amd64/conf//ld.script  18 Mar 2021 21:11:18 -
@@ -38,8 +38,8 @@ PHDRS
  */
 __ALIGN_SIZE = 0x1000;
 __kernel_base = 0x8000;
-__kernel_virt_base = __kernel_base + 0x100;
-__kernel_phys_base = 0x100;
+__kernel_virt_base = __kernel_base + 0x103;
+__kernel_phys_base = 0x103;
 __kernel_virt_to_phys = __kernel_phys_base - __kernel_virt_base;
 
 ENTRY(start)



Re: vmm crash on 6.9-beta

2021-03-19 Thread Jan Klemkow
Hi,

I had the same issue a few days ago a server hardware of mine.  I just
ran 'cvs up'.  So, it looks like a generic bug in FFS and not related to
vmm.

OpenBSD 6.9-beta (GENERIC.MP) #396: Thu Mar 11 19:15:56 MST 2021
dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP

ciao,
Jan

ddb{2}> show panic
ffs_valloc: dup alloc

ddb{2}> trace
db_enter() at db_enter+0x10
panic(81dda170) at panic+0x12a
ffs_inode_alloc(fd8a1acb50f0,81a4,fd8c3f7ba120,8000229d3088) at ffs
_inode_alloc+0x442
ufs_makeinode(81a4,fd8a8a498940,8000229d3380,8000229d33d0) at ufs_m
akeinode+0x7f
ufs_create(8000229d3130) at ufs_create+0x3c
VOP_CREATE(fd8a8a498940,8000229d3380,8000229d33d0,8000229d3190)
 at VOP_CREATE+0x4a
vn_open(8000229d3350,602,1a4) at vn_open+0x182
doopenat(800022915500,ff9c,cc7a0280ad0,601,1b6,8000229d3550) at doo
penat+0x1cd
syscall(8000229d35c0) at syscall+0x389
Xsyscall() at Xsyscall+0x128
end of kernel
end trace frame: 0x7f7c5520, count: -10

ddb{2}> ps
   PID TID   PPIDUID  S   FLAGS  WAIT  COMMAND
*56226  366608  70629  0  70x13cvs



Re: LibreSSL: extend the max. no. of SANs for avoid OOM error

2021-03-17 Thread Jan Klemkow
ping

On Tue, Mar 09, 2021 at 03:49:32PM +0100, Jan Klemkow wrote:
> Hi,
> 
> The verification of the https://ugos.ugm.ac.id certificate contains 2032
> subject alt names which leads to the following error in LibreSSL.
> 
> # openssl s_client -connect ugos.ugm.ac.id:443
> ...
> verify error:num=17:out of memory
> ...
> 
> The following diff sets the maximum number of SANs to the next higher
> number to fix this issue.
> 
> OK?
> 
> bye,
> Jan
> 
> Index: lib/libcrypto/x509/x509_internal.h
> ===
> RCS file: /cvs/src/lib/libcrypto/x509/x509_internal.h,v
> retrieving revision 1.6
> diff -u -p -r1.6 x509_internal.h
> --- lib/libcrypto/x509/x509_internal.h5 Jan 2021 16:45:59 -   
> 1.6
> +++ lib/libcrypto/x509/x509_internal.h9 Mar 2021 14:38:35 -
> @@ -31,7 +31,7 @@
>   * Limit the number of names and constraints we will check in a chain
>   * to avoid a hostile input DOS
>   */
> -#define X509_VERIFY_MAX_CHAIN_NAMES  512
> +#define X509_VERIFY_MAX_CHAIN_NAMES  4096
>  #define X509_VERIFY_MAX_CHAIN_CONSTRAINTS512
>  
>  /*
> 



Re: ixl(4): add ID for X710 10G SFP+

2021-03-15 Thread Jan Klemkow
On Mon, Mar 15, 2021 at 01:35:28AM -0600, Theo de Raadt wrote:
> My comments are about the "text name", which goes into every kernel
> anyone compiles.
> 
> It should be as short as possible.

Sorry, I missed that point.

> But the reason why 10G is incorrect is because surely the port can
> accept 1G, or a variety of other SFPs...  It is simply too exact,
> and wasting kernel bytes.

OK?

Thanks,
Jan

Index: if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.73
diff -u -p -r1.73 if_ixl.c
--- if_ixl.c26 Feb 2021 10:36:45 -  1.73
+++ if_ixl.c15 Mar 2021 07:42:48 -
@@ -1611,6 +1611,7 @@ struct ixl_device {
 
 static const struct ixl_device ixl_devices[] = {
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP },
+   { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP_2 },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_40G_BP },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_BP, },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_QSFP_1 },
Index: pcidevs
===
RCS file: /cvs/src/sys/dev/pci/pcidevs,v
retrieving revision 1.1960
diff -u -p -r1.1960 pcidevs
--- pcidevs 14 Mar 2021 01:09:29 -  1.1960
+++ pcidevs 15 Mar 2021 07:42:19 -
@@ -3702,6 +3702,7 @@ product INTEL ICH8_IGP_AMT0x104a  ICH8 I
 product INTEL ICH8_IGP_C   0x104b  ICH8 IGP C
 product INTEL ICH8_IFE 0x104c  ICH8 IFE
 product INTEL ICH8_IGP_M   0x104d  ICH8 IGP M
+product INTEL X710_10G_SFP_2   0x104e  X710 SFP+
 product INTEL PRO_100_VE_4 0x1050  PRO/100 VE
 product INTEL PRO_100_VE_5 0x1051  PRO/100 VE
 product INTEL PRO_100_VM_6 0x1052  PRO/100 VM
Index: pcidevs.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v
retrieving revision 1.1954
diff -u -p -r1.1954 pcidevs.h
--- pcidevs.h   14 Mar 2021 01:10:35 -  1.1954
+++ pcidevs.h   15 Mar 2021 07:42:21 -
@@ -3707,6 +3707,7 @@
 #definePCI_PRODUCT_INTEL_ICH8_IGP_C0x104b  /* ICH8 IGP C */
 #definePCI_PRODUCT_INTEL_ICH8_IFE  0x104c  /* ICH8 IFE */
 #definePCI_PRODUCT_INTEL_ICH8_IGP_M0x104d  /* ICH8 IGP M */
+#definePCI_PRODUCT_INTEL_X710_10G_SFP_20x104e  /* X710 
SFP+ */
 #definePCI_PRODUCT_INTEL_PRO_100_VE_4  0x1050  /* PRO/100 VE */
 #definePCI_PRODUCT_INTEL_PRO_100_VE_5  0x1051  /* PRO/100 VE */
 #definePCI_PRODUCT_INTEL_PRO_100_VM_6  0x1052  /* PRO/100 VM */
Index: pcidevs_data.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v
retrieving revision 1.1949
diff -u -p -r1.1949 pcidevs_data.h
--- pcidevs_data.h  14 Mar 2021 01:10:35 -  1.1949
+++ pcidevs_data.h  15 Mar 2021 07:42:21 -
@@ -12252,6 +12252,10 @@ static const struct pci_known_product pc
"ICH8 IGP M",
},
{
+   PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP_2,
+   "X710 SFP+",
+   },
+   {
PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_PRO_100_VE_4,
"PRO/100 VE",
},



Re: ixl(4): add ID for X710 10G SFP+

2021-03-15 Thread Jan Klemkow
On Sun, Mar 14, 2021 at 12:39:42PM -0600, Theo de Raadt wrote:
> > +product INTEL X710_10G_SFP_2   0x104e  X710 10G SFP+
> 
> You only need:
> 
> X710 SFP+
> 
> Adding 10G is incorrect.

OK?

Thanks,
Jan

Index: if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.73
diff -u -p -r1.73 if_ixl.c
--- if_ixl.c26 Feb 2021 10:36:45 -  1.73
+++ if_ixl.c15 Mar 2021 07:17:14 -
@@ -1611,6 +1611,7 @@ struct ixl_device {
 
 static const struct ixl_device ixl_devices[] = {
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP },
+   { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_SFP },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_40G_BP },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_BP, },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_QSFP_1 },
Index: pcidevs
===
RCS file: /cvs/src/sys/dev/pci/pcidevs,v
retrieving revision 1.1960
diff -u -p -r1.1960 pcidevs
--- pcidevs 14 Mar 2021 01:09:29 -  1.1960
+++ pcidevs 15 Mar 2021 07:15:22 -
@@ -3702,6 +3702,7 @@ product INTEL ICH8_IGP_AMT0x104a  ICH8 I
 product INTEL ICH8_IGP_C   0x104b  ICH8 IGP C
 product INTEL ICH8_IFE 0x104c  ICH8 IFE
 product INTEL ICH8_IGP_M   0x104d  ICH8 IGP M
+product INTEL X710_SFP 0x104e  X710 SFP+
 product INTEL PRO_100_VE_4 0x1050  PRO/100 VE
 product INTEL PRO_100_VE_5 0x1051  PRO/100 VE
 product INTEL PRO_100_VM_6 0x1052  PRO/100 VM
Index: pcidevs.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v
retrieving revision 1.1954
diff -u -p -r1.1954 pcidevs.h
--- pcidevs.h   14 Mar 2021 01:10:35 -  1.1954
+++ pcidevs.h   15 Mar 2021 07:15:24 -
@@ -3707,6 +3707,7 @@
 #definePCI_PRODUCT_INTEL_ICH8_IGP_C0x104b  /* ICH8 IGP C */
 #definePCI_PRODUCT_INTEL_ICH8_IFE  0x104c  /* ICH8 IFE */
 #definePCI_PRODUCT_INTEL_ICH8_IGP_M0x104d  /* ICH8 IGP M */
+#definePCI_PRODUCT_INTEL_X710_SFP  0x104e  /* X710 SFP+ */
 #definePCI_PRODUCT_INTEL_PRO_100_VE_4  0x1050  /* PRO/100 VE */
 #definePCI_PRODUCT_INTEL_PRO_100_VE_5  0x1051  /* PRO/100 VE */
 #definePCI_PRODUCT_INTEL_PRO_100_VM_6  0x1052  /* PRO/100 VM */
Index: pcidevs_data.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v
retrieving revision 1.1949
diff -u -p -r1.1949 pcidevs_data.h
--- pcidevs_data.h  14 Mar 2021 01:10:35 -  1.1949
+++ pcidevs_data.h  15 Mar 2021 07:15:24 -
@@ -12252,6 +12252,10 @@ static const struct pci_known_product pc
"ICH8 IGP M",
},
{
+   PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_SFP,
+   "X710 SFP+",
+   },
+   {
PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_PRO_100_VE_4,
"PRO/100 VE",
},



ixl(4): add ID for X710 10G SFP+

2021-03-14 Thread Jan Klemkow
Hi,

This diff attaches the Intel x710 10G SFP+ NIC in ixl(4).

ixl2 at pci11 dev 0 function 2 "Intel X710 10G SFP+" rev 0x02: port 1, FW 
8.1.63299 API 1.12, msix, 8 queues, address 3c:ec:ef:1f:c3:bc
ixl3 at pci11 dev 0 function 3 "Intel X710 10G SFP+" rev 0x02: port 3, FW 
8.1.63299 API 1.12, msix, 8 queues, address 3c:ec:ef:1f:c3:bd

ixl2: flags=8802 mtu 1500
lladdr 3c:ec:ef:1f:c3:bc
index 3 priority 0 llprio 3
media: Ethernet autoselect
status: no carrier
ixl3: flags=8802 mtu 1500
lladdr 3c:ec:ef:1f:c3:bd
index 4 priority 0 llprio 3
media: Ethernet autoselect
status: no carrier

OK?

bye,
Jan

Index: pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.73
diff -u -p -r1.73 if_ixl.c
--- pci/if_ixl.c26 Feb 2021 10:36:45 -  1.73
+++ pci/if_ixl.c13 Mar 2021 23:56:13 -
@@ -1611,6 +1611,7 @@ struct ixl_device {
 
 static const struct ixl_device ixl_devices[] = {
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP },
+   { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP_2 },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_40G_BP },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_BP, },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XL710_QSFP_1 },
Index: pci/pcidevs
===
RCS file: /cvs/src/sys/dev/pci/pcidevs,v
retrieving revision 1.1960
diff -u -p -r1.1960 pcidevs
--- pci/pcidevs 14 Mar 2021 01:09:29 -  1.1960
+++ pci/pcidevs 14 Mar 2021 11:33:27 -
@@ -3702,6 +3702,7 @@ product INTEL ICH8_IGP_AMT0x104a  ICH8 I
 product INTEL ICH8_IGP_C   0x104b  ICH8 IGP C
 product INTEL ICH8_IFE 0x104c  ICH8 IFE
 product INTEL ICH8_IGP_M   0x104d  ICH8 IGP M
+product INTEL X710_10G_SFP_2   0x104e  X710 10G SFP+
 product INTEL PRO_100_VE_4 0x1050  PRO/100 VE
 product INTEL PRO_100_VE_5 0x1051  PRO/100 VE
 product INTEL PRO_100_VM_6 0x1052  PRO/100 VM
Index: pci/pcidevs.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v
retrieving revision 1.1954
diff -u -p -r1.1954 pcidevs.h
--- pci/pcidevs.h   14 Mar 2021 01:10:35 -  1.1954
+++ pci/pcidevs.h   14 Mar 2021 11:33:27 -
@@ -3707,6 +3707,7 @@
 #definePCI_PRODUCT_INTEL_ICH8_IGP_C0x104b  /* ICH8 IGP C */
 #definePCI_PRODUCT_INTEL_ICH8_IFE  0x104c  /* ICH8 IFE */
 #definePCI_PRODUCT_INTEL_ICH8_IGP_M0x104d  /* ICH8 IGP M */
+#definePCI_PRODUCT_INTEL_X710_10G_SFP_20x104e  /* X710 
10G SFP+ */
 #definePCI_PRODUCT_INTEL_PRO_100_VE_4  0x1050  /* PRO/100 VE */
 #definePCI_PRODUCT_INTEL_PRO_100_VE_5  0x1051  /* PRO/100 VE */
 #definePCI_PRODUCT_INTEL_PRO_100_VM_6  0x1052  /* PRO/100 VM */
Index: pci/pcidevs_data.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v
retrieving revision 1.1949
diff -u -p -r1.1949 pcidevs_data.h
--- pci/pcidevs_data.h  14 Mar 2021 01:10:35 -  1.1949
+++ pci/pcidevs_data.h  14 Mar 2021 11:33:27 -
@@ -12252,6 +12252,10 @@ static const struct pci_known_product pc
"ICH8 IGP M",
},
{
+   PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_SFP_2,
+   "X710 10G SFP+",
+   },
+   {
PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_PRO_100_VE_4,
"PRO/100 VE",
},



Re: add missing PCI ID for Intel NVMe

2021-03-13 Thread Jan Klemkow
On Fri, Mar 12, 2021 at 11:56:00AM +0100, Mark Kettenis wrote:
> I believe this is what ark.intel.com calls a "Intel SSD DC P4510
> Series" part.  Is that correct?

Yes, that is correct.

On Fri, Mar 12, 2021 at 10:00:54PM +1100, Jonathan Gray wrote:
> On Fri, Mar 12, 2021 at 11:30:04AM +0100, Jan Klemkow wrote:
> So it is a 'SSD DC P4510'
> 
> A driver downloaded from Intel has
> ...
> PCI\VEN_8086&DEV_0A54.DeviceDesc = "Intel(R) SSD DC 
> P4500/4600/4501/4601/4608/4510/4610/4511 Series"
> ...
> 
> perhaps just
> product INTEL NVME_5  0x0a54  SSD DC

You are right, that's a better name.  Also the sticker on the disk just
says "Intel SSD DC".

OK?

Thanks,
Jan

Index: pcidevs
===
RCS file: /cvs/src/sys/dev/pci/pcidevs,v
retrieving revision 1.1959
diff -u -p -r1.1959 pcidevs
--- pcidevs 27 Feb 2021 03:00:54 -  1.1959
+++ pcidevs 13 Mar 2021 20:22:04 -
@@ -3465,6 +3465,7 @@ product INTEL CORE4G_M_ULT_GT30x0a26  HD
 product INTEL CORE4G_S_ULT_GT3 0x0a2a  HD Graphics
 product INTEL CORE4G_R_ULT_GT3_1 0x0a2bHD Graphics
 product INTEL CORE4G_R_ULT_GT3_2 0x0a2eIris Graphics 5100
+product INTEL NVME_5   0x0a54  SSD DC
 product INTEL GMA3600_00x0be0  GMA 3600
 product INTEL D2000_IGD0x0be1  Atom D2000/N2000 Video
 product INTEL GMA3600_20x0be2  GMA 3600
Index: pcidevs.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v
retrieving revision 1.1953
diff -u -p -r1.1953 pcidevs.h
--- pcidevs.h   27 Feb 2021 03:01:25 -  1.1953
+++ pcidevs.h   13 Mar 2021 20:22:06 -
@@ -3470,6 +3470,7 @@
 #definePCI_PRODUCT_INTEL_CORE4G_S_ULT_GT3  0x0a2a  /* HD 
Graphics */
 #definePCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_10x0a2b  /* HD 
Graphics */
 #definePCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_20x0a2e  /* Iris 
Graphics 5100 */
+#definePCI_PRODUCT_INTEL_NVME_50x0a54  /* SSD DC */
 #definePCI_PRODUCT_INTEL_GMA3600_0 0x0be0  /* GMA 3600 */
 #definePCI_PRODUCT_INTEL_D2000_IGD 0x0be1  /* Atom 
D2000/N2000 Video */
 #definePCI_PRODUCT_INTEL_GMA3600_2 0x0be2  /* GMA 3600 */
Index: pcidevs_data.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v
retrieving revision 1.1948
diff -u -p -r1.1948 pcidevs_data.h
--- pcidevs_data.h  27 Feb 2021 03:01:25 -  1.1948
+++ pcidevs_data.h  13 Mar 2021 20:22:06 -
@@ -11304,6 +11304,10 @@ static const struct pci_known_product pc
"Iris Graphics 5100",
},
{
+   PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_NVME_5,
+   "SSD DC",
+   },
+   {
PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_GMA3600_0,
"GMA 3600",
},



add missing PCI ID for Intel NVMe

2021-03-12 Thread Jan Klemkow
Hi,

This diff add a missing PCI ID of an Intel NVMe disk.  The disk works
after my last fix [1].

OK?

bye,
Jan

[1]: https://marc.info/?l=openbsd-tech&m=161418460303831

Index: pcidevs
===
RCS file: /cvs/src/sys/dev/pci/pcidevs,v
retrieving revision 1.1959
diff -u -p -r1.1959 pcidevs
--- pcidevs 27 Feb 2021 03:00:54 -  1.1959
+++ pcidevs 12 Mar 2021 10:16:44 -
@@ -3465,6 +3465,7 @@ product INTEL CORE4G_M_ULT_GT30x0a26  HD
 product INTEL CORE4G_S_ULT_GT3 0x0a2a  HD Graphics
 product INTEL CORE4G_R_ULT_GT3_1 0x0a2bHD Graphics
 product INTEL CORE4G_R_ULT_GT3_2 0x0a2eIris Graphics 5100
+product INTEL NVME_1   0x0a54  NVMe Datacenter SSD
 product INTEL GMA3600_00x0be0  GMA 3600
 product INTEL D2000_IGD0x0be1  Atom D2000/N2000 Video
 product INTEL GMA3600_20x0be2  GMA 3600
Index: pcidevs.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v
retrieving revision 1.1953
diff -u -p -r1.1953 pcidevs.h
--- pcidevs.h   27 Feb 2021 03:01:25 -  1.1953
+++ pcidevs.h   12 Mar 2021 10:16:46 -
@@ -3470,6 +3470,7 @@
 #definePCI_PRODUCT_INTEL_CORE4G_S_ULT_GT3  0x0a2a  /* HD 
Graphics */
 #definePCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_10x0a2b  /* HD 
Graphics */
 #definePCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_20x0a2e  /* Iris 
Graphics 5100 */
+#definePCI_PRODUCT_INTEL_NVME_10x0a54  /* NVMe 
Datacenter SSD */
 #definePCI_PRODUCT_INTEL_GMA3600_0 0x0be0  /* GMA 3600 */
 #definePCI_PRODUCT_INTEL_D2000_IGD 0x0be1  /* Atom 
D2000/N2000 Video */
 #definePCI_PRODUCT_INTEL_GMA3600_2 0x0be2  /* GMA 3600 */
Index: pcidevs_data.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v
retrieving revision 1.1948
diff -u -p -r1.1948 pcidevs_data.h
--- pcidevs_data.h  27 Feb 2021 03:01:25 -  1.1948
+++ pcidevs_data.h  12 Mar 2021 10:16:46 -
@@ -11304,6 +11304,10 @@ static const struct pci_known_product pc
"Iris Graphics 5100",
},
{
+   PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_NVME_1,
+   "NVMe Datacenter SSD",
+   },
+   {
PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_GMA3600_0,
"GMA 3600",
},



LibreSSL: extend the max. no. of SANs for avoid OOM error

2021-03-09 Thread Jan Klemkow
Hi,

The verification of the https://ugos.ugm.ac.id certificate contains 2032
subject alt names which leads to the following error in LibreSSL.

# openssl s_client -connect ugos.ugm.ac.id:443
...
verify error:num=17:out of memory
...

The following diff sets the maximum number of SANs to the next higher
number to fix this issue.

OK?

bye,
Jan

Index: lib/libcrypto/x509/x509_internal.h
===
RCS file: /cvs/src/lib/libcrypto/x509/x509_internal.h,v
retrieving revision 1.6
diff -u -p -r1.6 x509_internal.h
--- lib/libcrypto/x509/x509_internal.h  5 Jan 2021 16:45:59 -   1.6
+++ lib/libcrypto/x509/x509_internal.h  9 Mar 2021 14:38:35 -
@@ -31,7 +31,7 @@
  * Limit the number of names and constraints we will check in a chain
  * to avoid a hostile input DOS
  */
-#define X509_VERIFY_MAX_CHAIN_NAMES512
+#define X509_VERIFY_MAX_CHAIN_NAMES4096
 #define X509_VERIFY_MAX_CHAIN_CONSTRAINTS  512
 
 /*



Re: pcidump(8): add missing PCI classes

2021-03-05 Thread Jan Klemkow
On Fri, Mar 05, 2021 at 09:22:28AM -0700, Theo de Raadt wrote:
> Fix dump() to convert subclass == NULL into something else, or maybe the
> fix should be in pci_subclass() itself.

My initial mistake was to use zero in an empty list.  This leads to one
element in the list, which causes wrong list handling in the followup
code path.

So, the following diff remove the zero from the list.  Also, add a check
for ps->name is NULL, to prevent dump() to print (null).  And fix a
useless line break while here.

OK?

Thanks,
Jan

Index: pcidump.c
===
RCS file: /cvs/src/usr.sbin/pcidump/pcidump.c,v
retrieving revision 1.62
diff -u -p -r1.62 pcidump.c
--- pcidump.c   5 Mar 2021 12:57:20 -   1.62
+++ pcidump.c   5 Mar 2021 17:05:40 -
@@ -1296,8 +1296,8 @@ static const struct pci_subclass pci_sub
{ PCI_SUBCLASS_DASP_MISC,   "Miscellaneous" },
 };
 
-static const struct pci_subclass pci_subclass_accelerator[] = {0};
-static const struct pci_subclass pci_subclass_instrumentation[] = {0};
+static const struct pci_subclass pci_subclass_accelerator[] = {};
+static const struct pci_subclass pci_subclass_instrumentation[] = {};
 
 #define CLASS(_c, _n, _s) { \
.class = _c, \
@@ -1389,7 +1389,6 @@ pci_class_name(pci_class_t class)
return (pc->name);
 }
 
-
 static const char *
 pci_subclass_name(pci_class_t class, pci_subclass_t subclass)
 {
@@ -1401,7 +1400,7 @@ pci_subclass_name(pci_class_t class, pci
return ("(unknown)");
 
ps = pci_subclass(pc, subclass);
-   if (ps == NULL)
+   if (ps == NULL || ps->name == NULL)
return ("(unknown)");
 
return (ps->name);



Re: pcidump(8): add missing PCI classes

2021-03-05 Thread Jan Klemkow
On Fri, Mar 05, 2021 at 04:13:53PM +0100, Mark Kettenis wrote:
> > Date: Fri, 5 Mar 2021 12:05:38 +0100
> > From: Jan Klemkow 
> > Content-Type: text/plain; charset=us-ascii
> > Content-Disposition: inline
> > 
> > Hi,
> > 
> > this diff adds the missing PCI classes Accelerator and Instrumentation.
> > Thus, we can replace a few unknown in its output:
> > 
> > -   0x0008: Class: 13 (unknown), Subclass: 00 (unknown),
> > +   0x0008: Class: 13 Instrumentation, Subclass: 00 (null),
> 
> Is this "(null)" the result of printing a null pointer?  That would be
> not so good.

What do you suggest to use instead?  Empty String, or "unknown"?  It is
vendor specific.

Thanks,
Jan



pcidump(8): add missing PCI classes

2021-03-05 Thread Jan Klemkow
Hi,

this diff adds the missing PCI classes Accelerator and Instrumentation.
Thus, we can replace a few unknown in its output:

-   0x0008: Class: 13 (unknown), Subclass: 00 (unknown),
+   0x0008: Class: 13 Instrumentation, Subclass: 00 (null),

Both Classes have vendor specific APIs.  So, there are no predefined
subclasses.

OK?

bye,
Jan

Index: pcidump.c
===
RCS file: /cvs/src/usr.sbin/pcidump/pcidump.c,v
retrieving revision 1.61
diff -u -p -r1.61 pcidump.c
--- pcidump.c   17 Jan 2021 11:54:15 -  1.61
+++ pcidump.c   5 Mar 2021 10:57:27 -
@@ -1296,6 +1296,9 @@ static const struct pci_subclass pci_sub
{ PCI_SUBCLASS_DASP_MISC,   "Miscellaneous" },
 };
 
+static const struct pci_subclass pci_subclass_accelerator[] = {0};
+static const struct pci_subclass pci_subclass_instrumentation[] = {0};
+
 #define CLASS(_c, _n, _s) { \
.class = _c, \
.name = _n, \
@@ -1338,6 +1341,10 @@ static const struct pci_class pci_classe
pci_subclass_crypto),
CLASS(PCI_CLASS_DASP,   "DASP",
pci_subclass_dasp),
+   CLASS(PCI_CLASS_ACCELERATOR,"Accelerator",
+   pci_subclass_accelerator),
+   CLASS(PCI_CLASS_INSTRUMENTATION, "Instrumentation",
+   pci_subclass_instrumentation),
 };
 
 static const struct pci_class *



ixl(4): add missing pci dev id for X710 10GBase-T

2021-02-26 Thread Jan Klemkow
Hi,

The diff below adds a missing PCI device ID for an X710 10GBase NIC into
the ixl(4) driver.  The interfaces attach and run properly with this
diff.

ixl0 at pci11 dev 0 function 0 "Intel X710 10GBaseT" rev 0x02: port 0, FW 
8.1.63299 API 1.12, msix, 8 queues, address 3c:ec:ef:1f:c3:ba
ixl1 at pci11 dev 0 function 1 "Intel X710 10GBaseT" rev 0x02: port 2, FW 
8.1.63299 API 1.12, msix, 8 queues, address 3c:ec:ef:1f:c3:bb

# ifconfig ixl
ixl0: flags=8843 mtu 1500
lladdr 3c:ec:ef:1f:c3:ba
index 1 priority 0 llprio 3
media: Ethernet autoselect (1000baseT full-duplex)
status: active
inet 192.168.123.1 netmask 0xff00 broadcast 192.168.123.255
ixl1: flags=8843 mtu 1500
lladdr 3c:ec:ef:1f:c3:bb
index 2 priority 0 llprio 3
media: Ethernet autoselect (1000baseT full-duplex)
status: active
inet 192.168.124.1 netmask 0xff00 broadcast 192.168.124.255

OK?

bye,
Jan

Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.72
diff -u -p -r1.72 if_ixl.c
--- dev/pci/if_ixl.c25 Jan 2021 11:11:22 -  1.72
+++ dev/pci/if_ixl.c26 Feb 2021 09:51:56 -
@@ -1622,6 +1622,7 @@ static const struct ixl_device ixl_devic
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_T4_10G },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XXV710_25G_BP },
{ &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_XXV710_25G_SFP28, },
+   { &ixl_710, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_T, },
{ &ixl_722, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X722_10G_KX },
{ &ixl_722, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X722_10G_QSFP },
{ &ixl_722, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X722_10G_SFP_1 },
Index: dev/pci/pcidevs
===
RCS file: /cvs/src/sys/dev/pci/pcidevs,v
retrieving revision 1.1956
diff -u -p -r1.1956 pcidevs
--- dev/pci/pcidevs 22 Feb 2021 01:17:23 -  1.1956
+++ dev/pci/pcidevs 26 Feb 2021 09:49:01 -
@@ -3962,6 +3962,7 @@ product INTEL I219_V140x15fa  I219-V
 product INTEL I219_LM130x15fb  I219-LM
 product INTEL I219_V13 0x15fc  I219-V
 product INTEL I225_BLANK_NVM   0x15fd  I225
+product INTEL X710_10G_T   0x15ff  X710 10GBaseT
 product INTEL CORE5G_H_PCIE_X160x1601  Core 5G PCIE
 product INTEL CORE5G_M_GT1_1   0x1602  HD Graphics
 product INTEL CORE5G_THERM 0x1603  Core 5G Thermal
Index: dev/pci/pcidevs.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs.h,v
retrieving revision 1.1949
diff -u -p -r1.1949 pcidevs.h
--- dev/pci/pcidevs.h   22 Feb 2021 01:18:01 -  1.1949
+++ dev/pci/pcidevs.h   26 Feb 2021 09:49:05 -
@@ -3967,6 +3967,7 @@
 #definePCI_PRODUCT_INTEL_I219_LM13 0x15fb  /* I219-LM */
 #definePCI_PRODUCT_INTEL_I219_V13  0x15fc  /* I219-V */
 #definePCI_PRODUCT_INTEL_I225_BLANK_NVM0x15fd  /* I225 
*/
+#definePCI_PRODUCT_INTEL_X710_10G_T0x15ff  /* X710 
10GBaseT */
 #definePCI_PRODUCT_INTEL_CORE5G_H_PCIE_X16 0x1601  /* Core 
5G PCIE */
 #definePCI_PRODUCT_INTEL_CORE5G_M_GT1_10x1602  /* HD 
Graphics */
 #definePCI_PRODUCT_INTEL_CORE5G_THERM  0x1603  /* Core 5G 
Thermal */
Index: dev/pci/pcidevs_data.h
===
RCS file: /cvs/src/sys/dev/pci/pcidevs_data.h,v
retrieving revision 1.1944
diff -u -p -r1.1944 pcidevs_data.h
--- dev/pci/pcidevs_data.h  22 Feb 2021 01:18:01 -  1.1944
+++ dev/pci/pcidevs_data.h  26 Feb 2021 09:49:05 -
@@ -13292,6 +13292,10 @@ static const struct pci_known_product pc
"I225",
},
{
+   PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_X710_10G_T,
+   "X710 10GBaseT",
+   },
+   {
PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_CORE5G_H_PCIE_X16,
"Core 5G PCIE",
},



Re: LibreSSL legacy verifier regression

2021-02-25 Thread Jan Klemkow
On Wed, Feb 24, 2021 at 09:21:56PM +0100, Theo Buehler wrote:
> On Wed, Feb 24, 2021 at 09:00:05PM +0100, Theo Buehler wrote:
> > On Wed, Feb 24, 2021 at 06:47:00AM +0100, Jan Klemkow wrote:
> > > another co-worker of mine has found an other regress in the LibreSSL
> > > legacy verifier.  I took his diff and made a test for our regression
> > > framework.
> > > 
> > > The legacy verifier seems not to check the certificate if no root CA was
> > > given.  The following test creates an expired certificate and tries to
> > > verify it.  In one case it found the expected error in another, it does
> > > not.
> > 
> > Thanks for the report and the test case, that's very helpful. The diff
> > at the end addresses this.
> > 
> > The verifier does not find the expected error because it now bails out
> > earlier.  This is a consequence of a refactoring of X509_verify_cert()
> > (x509_vfy.c r1.75) that was done to integrate the new verifier.
> > 
> > https://cvsweb.openbsd.org/cgi-bin/cvsweb/src/lib/libcrypto/x509/x509_vfy.c.diff?r1=1.74&r2=1.75
> > 
> > What happens is that x509_legacy_verify_build_chain() returns ok == 0 in
> > your test case. The safety net at the end of x509_verify_cert_legacy()
> > sets ctx->error to X509_V_ERR_UNSPECIFIED (so the unchecked call to
> > X509_verify_cert() in your regress test actually indicates verification
> > failure).
> > 
> > 
> > The diff below restores the previous behavior and fixes a bug.
> > 
> > Prior to the the refactoring, each 'goto end' in the code that is now in
> > x509_legacy_verify_build_chain() would stop validation, while in other
> > cases validation would have carried on. So indicate this via the return
> > value and return ok via a pointer.
> > 
> > The bug is that the return value check of x509_legacy_verify_build_chain()
> > should have been if (ok <= 0), not if (!ok).
> > 
> > Regarding your regress diff: I don't think we want to land it as it is.

Ok.

> > The verifier lives in libcrypto/x509, so the regress test belongs in
> > there.

You are right, its the better place.  At least I want to send you a bug
report with concrete code to test.

> > We really need to come up with an extensible design that can check a
> > number of such cases (and ideally includes the bulk of your openssl/x509
> > regress tests). I don't want to add a directory for every bug in the
> > verifier or legacy verifier. As jsing already mentioned, I expect that
> > we want to commit the test certs so we don't need perl modules from
> > ports to run the regress. Then we want to add generating scripts and a
> > README that gives instructions on how to regenerate the certs if needed.
> > 
> > I would like to have one C program that runs all tests in a loop (or
> > perhaps one C program per family of regressions). It would be nice if
> > this C program could also be compiled against OpenSSL 1.1.1 so we can
> > easily check for differences of behavior (see x509/bettertls/Makefile
> > for an example that does this).  For your test program this just means:
> > don't access csc->blah, but use X509_STORE_CTX_get_blah(csc) instead.
> > 
> > Why does the test set TRUSTED_FIRST?

I just forget to remove the this line, from the original version.

> > The code also needs a bit of cleaning. There are a number of unchecked
> > return values, for example strdup and sk_*_push, and csc is leaked
> > after X509_verify_cert().
> > 
> > It would also be nice to run this test against the new verifier.

The test passes with the new verifier in current, but not in 6.8.

> Missed an obvious simplification.

The diff looks fine to me and it fixes our regressions.
I would give you an OK jan, fwiw.

Thanks,
Jan

> Index: x509/x509_vfy.c
> ===
> RCS file: /cvs/src/lib/libcrypto/x509/x509_vfy.c,v
> retrieving revision 1.85
> diff -u -p -r1.85 x509_vfy.c
> --- x509/x509_vfy.c   11 Feb 2021 04:56:43 -  1.85
> +++ x509/x509_vfy.c   24 Feb 2021 20:19:34 -
> @@ -240,12 +240,13 @@ x509_vfy_check_id(X509_STORE_CTX *ctx) {
>   * Oooh..
>   */
>  static int
> -X509_verify_cert_legacy_build_chain(X509_STORE_CTX *ctx, int *bad)
> +X509_verify_cert_legacy_build_chain(X509_STORE_CTX *ctx, int *bad, int 
> *out_ok)
>  {
>   X509 *x, *xtmp, *xtmp2, *chain_ss = NULL;
>   int bad_chain = 0;
>   X509_VERIFY_PARAM *param = ctx->param;
> - int depth, i, ok = 0;
> + int ok = 0, ret = 0;
> + int depth, i;
>   int 

fix nvme(4): NULL deref. and empty device attachments

2021-02-24 Thread Jan Klemkow
Hi,

While attaching the following disks, the nvme driver runs into a NULL
dereference in nvme_scsi_capacity16() and nvme_scsi_capacity().

nvme0 at pci1 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: 
msix, NVMe 1.2
nvme0: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ0413002P4P0DGN
scsibus1 at nvme0: 129 targets, initiator 0
sd0 at scsibus1 targ 1 lun 0: 
sd0: 3815447MB, 512 bytes/sector, 7814037168 sectors
sd1 at scsibus1 targ 2 lun 0: 
uvm_fault(0x821d00e8, 0x0, 0, 1) -> e
kernel: page fault trap, code=0
Stopped at  nvme_scsi_capacity16+0x39:  movq0(%rax),%rcx
ddb{0}>

"ns" in both functions will be NULL, if "identify" is not allocated in
nvme_scsi_probe().  Thus, its better to just not attach this empty
disks/LUNs.

nvme0 at pci1 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: 
msix, NVMe 1.2
nvme0: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ0413002P4P0DGN
scsibus1 at nvme0: 129 targets, initiator 0
sd0 at scsibus1 targ 1 lun 0: 
sd0: 3815447MB, 512 bytes/sector, 7814037168 sectors
ppb1 at pci0 dev 3 function 2 "AMD 17h PCIE" rev 0x00: msi
pci2 at ppb1 bus 98
nvme1 at pci2 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: 
msix, NVMe 1.2
nvme1: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ041500C34P0DGN
scsibus2 at nvme1: 129 targets, initiator 0
sd1 at scsibus2 targ 1 lun 0: 
sd1: 3815447MB, 512 bytes/sector, 7814037168 sectors
ppb2 at pci0 dev 3 function 3 "AMD 17h PCIE" rev 0x00: msi
pci3 at ppb2 bus 99
nvme2 at pci3 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: 
msix, NVMe 1.2
nvme2: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ041402Z64P0DGN
scsibus3 at nvme2: 129 targets, initiator 0
sd2 at scsibus3 targ 1 lun 0: 
sd2: 3815447MB, 512 bytes/sector, 7814037168 sectors
ppb3 at pci0 dev 3 function 4 "AMD 17h PCIE" rev 0x00: msi
pci4 at ppb3 bus 100
nvme3 at pci4 dev 0 function 0 vendor "Intel", unknown product 0x0a54 rev 0x00: 
msix, NVMe 1.2
nvme3: INTEL SSDPE2KX040T8, firmware VDV10170, serial PHLJ041403134P0DGN
scsibus4 at nvme3: 129 targets, initiator 0
sd3 at scsibus4 targ 1 lun 0: 
sd3: 3815447MB, 512 bytes/sector, 7814037168 sectors

The following diff signals an error for the upper probing function in
the SCSI layer to prevents further function calls in nvme(4) which would
just leads to the upper described error and hundreds of not configured
devices.

OK?

bye,
Jan

Index: dev/ic/nvme.c
===
RCS file: /cvs//src/sys/dev/ic/nvme.c,v
retrieving revision 1.90
diff -u -p -r1.90 nvme.c
--- dev/ic/nvme.c   9 Feb 2021 01:50:10 -   1.90
+++ dev/ic/nvme.c   24 Feb 2021 16:01:48 -
@@ -463,11 +463,16 @@ nvme_scsi_probe(struct scsi_link *link)
scsi_io_put(&sc->sc_iopool, ccb);
 
identify = NVME_DMA_KVA(mem);
-   if (rv == 0 && lemtoh64(&identify->nsze) > 0) {
-   /* Commit namespace if it has a size greater than zero. */
-   identify = malloc(sizeof(*identify), M_DEVBUF, M_WAITOK);
-   memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify));
-   sc->sc_namespaces[link->target].ident = identify;
+   if (rv == 0) {
+   if (lemtoh64(&identify->nsze) > 0) {
+   /* Commit namespace if it has a size greater than zero. 
*/
+   identify = malloc(sizeof(*identify), M_DEVBUF, 
M_WAITOK);
+   memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify));
+   sc->sc_namespaces[link->target].ident = identify;
+   } else {
+   /* Don't attach a namespace if its size is zero. */
+   rv = ENXIO;
+   }
}
 
nvme_dmamem_free(sc, mem);



LibreSSL legacy verifier regression

2021-02-24 Thread Jan Klemkow
Hi,

another co-worker of mine has found an other regress in the LibreSSL
legacy verifier.  I took his diff and made a test for our regression
framework.

The legacy verifier seems not to check the certificate if no root CA was
given.  The following test creates an expired certificate and tries to
verify it.  In one case it found the expected error in another, it does
not.

OK?

bye,
Jan

Index: lib/libcrypto/Makefile
===
RCS file: /cvs/src/regress/lib/libcrypto/Makefile,v
retrieving revision 1.41
diff -u -p -r1.41 Makefile
--- lib/libcrypto/Makefile  26 Dec 2020 00:48:56 -  1.41
+++ lib/libcrypto/Makefile  24 Feb 2021 05:29:51 -
@@ -23,6 +23,7 @@ SUBDIR += ecdsa
 SUBDIR += engine
 SUBDIR += evp
 SUBDIR += exp
+SUBDIR += expcert
 SUBDIR += free
 SUBDIR += gcm128
 SUBDIR += gost
Index: lib/libcrypto/expcert/Makefile
===
RCS file: lib/libcrypto/expcert/Makefile
diff -N lib/libcrypto/expcert/Makefile
--- /dev/null   1 Jan 1970 00:00:00 -
+++ lib/libcrypto/expcert/Makefile  24 Feb 2021 05:39:38 -
@@ -0,0 +1,29 @@
+# $OpenBSD$
+
+LDFLAGS += -lcrypto
+
+PROG = expcrt
+
+PKG_REQUIRE != pkg_info -e 'p5-IO-Socket-SSL-*'
+.if empty (PKG_REQUIRE)
+regress:
+   @echo "missing package p5-IO-Socket-SSL"
+   @echo SKIPPED
+.endif
+
+REGRESS_TARGETS =  test-chain-with-root-CA
+REGRESS_TARGETS += test-chain-without-root-CA
+REGRESS_SETUP_ONCE =   create-certs
+
+REGRESS_EXPECTED_FAILURES = test-chain-without-root-CA
+
+create-certs: create-certs.pl ${PROG}
+   perl ${.CURDIR}/create-certs.pl
+
+test-chain-with-root-CA:
+   ./expcrt -e 10 -r
+
+test-chain-without-root-CA:
+   ./expcrt -e 10
+
+.include 
Index: lib/libcrypto/expcert/create-certs.pl
===
RCS file: lib/libcrypto/expcert/create-certs.pl
diff -N lib/libcrypto/expcert/create-certs.pl
--- /dev/null   1 Jan 1970 00:00:00 -
+++ lib/libcrypto/expcert/create-certs.pl   24 Feb 2021 05:27:46 -
@@ -0,0 +1,46 @@
+#!/usr/bin/perl
+
+# Copyright (c) 2021 Anton Borowka 
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+use strict;
+use warnings;
+
+use IO::Socket::SSL::Utils;
+
+my %certs;
+
+@{$certs{root}}{qw/cert key/} = CERT_create(
+CA => 1,
+not_after => time() + 31536,
+subject => { commonName => 'Root CA' },
+);
+
+@{$certs{intermediate}}{qw/cert key/} = CERT_create(
+CA => 1,
+issuer => [@{$certs{root}}{qw/cert key/}],
+not_after => time() + 31536,
+subject => { commonName => 'Intermediate CA' },
+);
+
+@{$certs{expired}}{qw/cert key/} = CERT_create(
+issuer => [@{$certs{intermediate}}{qw/cert key/}],
+not_before => time() - 7200,
+not_after => time() - 3600,
+subject => { commonName => 'Expired' },
+);
+
+for (sort keys %certs) {
+PEM_cert2file($certs{$_}{cert}, "$_.crt");
+}
Index: lib/libcrypto/expcert/expcrt.c
===
RCS file: lib/libcrypto/expcert/expcrt.c
diff -N lib/libcrypto/expcert/expcrt.c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ lib/libcrypto/expcert/expcrt.c  24 Feb 2021 05:27:46 -
@@ -0,0 +1,218 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2021 Jan Klemkow 
+ * Copyright (c) 2021 Anton Borowka 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#includ

Re: LibreSSL regressions

2021-02-16 Thread Jan Klemkow
On Tue, Feb 16, 2021 at 04:36:59AM +1100, Joel Sing wrote:
> On 21-02-15 14:49:46, Jan Klemkow wrote:
> > On Sat, Feb 13, 2021 at 03:53:48PM +0100, Theo Buehler wrote:
> > > On Sat, Feb 13, 2021 at 11:58:04AM +0100, Jan Klemkow wrote:
> > > > A coworker of mine has made tests with LibreSSL [1] and found some
> > > > regressions.  I took his test descriptions and created the following
> > > > automated regression test.  In the repository he described his findings
> > > > in detail.  I kept the numbers of the files and subtests in the target
> > > > names for now.  So, its easier to match it with his files.
> > > > 
> > > > I don't know how to handle the result of "test-01-ssl".  Thats why its
> > > > just a comment.  Someone may have an idea to handle this properly.
> > > > 
> > > > Any comments, wishes or OK's?
> > > > 
> > > > [1]: https://github.com/noxxi/libressl-tests
> > > 
> > > First of all thanks for the effort!
> > > 
> > > The perl script and probably also the Makefile should have a license.
> > > 
> > > Please add a check that tests whether the required perl modules are
> > > installed (p5-IO-Socket-SSL and p5-Net-SSLeay) and otherwise prints
> > > SKIPPED and their names, so I can install them if they're not present.
> > > I never remember their exact capitalization and hyphenation...
> > > 
> > > Various comments inline, and a patch for openssl(1) at the end that may
> > > simplify some things.
> > 
> > This is an updated version of the test including comments and wishes
> > from tb@ and bluhm@.
> > 
> > OK?
> 
> This currently drives openssl(1) for tests, which means that it is
> testing openssl(1), libssl and libcrypto, when what you're really
> wanting to test is libcrypto's verifier. While this works, the
> problem is that a change or breakage in libssl or openssl(1) results
> in a regress failure for libcrypto. If this is to land in its
> current form it really should be in regress/usr.bin/openssl -
> alternatively, it could be reworked to explicitly test libcrypto's
> APIs and remain here.
> 
> Some additional comments inline.

So, the following diff should hit all needs.

OK?

Thanks,
Jan

Index: usr.bin/openssl/Makefile
===
RCS file: /cvs/src/regress/usr.bin/openssl/Makefile,v
retrieving revision 1.6
diff -u -p -r1.6 Makefile
--- usr.bin/openssl/Makefile19 Mar 2018 03:41:40 -  1.6
+++ usr.bin/openssl/Makefile15 Feb 2021 20:37:11 -
@@ -1,6 +1,6 @@
 #  $OpenBSD: Makefile,v 1.6 2018/03/19 03:41:40 beck Exp $
 
-SUBDIR= options
+SUBDIR= options x509
 
 CLEANFILES+= testdsa.key testdsa.pem rsakey.pem rsacert.pem dsa512.pem
 CLEANFILES+= appstest_dir
Index: usr.bin/openssl/x509/Makefile
===
RCS file: usr.bin/openssl/x509/Makefile
diff -N usr.bin/openssl/x509/Makefile
--- /dev/null   1 Jan 1970 00:00:00 -
+++ usr.bin/openssl/x509/Makefile   16 Feb 2021 12:06:10 -
@@ -0,0 +1,129 @@
+# $OpenBSD$
+
+# Copyright (c) 2021 Jan Klemkow 
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This regression test is based on manual test descriptions from:
+# https://github.com/noxxi/libressl-tests
+
+# The following port must be installed for the regression tests:
+# p5-IO-Socket-SSL perl interface to SSL sockets
+
+PERL = perl
+OPENSSL ?= openssl
+
+PKG_REQUIRE != pkg_info -e 'p5-IO-Socket-SSL-*'
+.if empty (PKG_REQUIRE)
+regress:
+   @echo "missing package p5-IO-Socket-SSL"
+   @echo SKIPPED
+.endif
+
+REGRESS_TARGETS += test-inlabel-wildcard-cert-no-CA-client
+REGRESS_TARGETS += test-inlabel-wildcard-cert-CA-client
+REGRESS_TARGETS += test-common-wildcard-cert-no-CA-client
+REGRESS_TARGETS += test-common-wildcard-cert-CA-client
+REGRESS_TARGETS += test-verify-unusual-wildcard-cert
+REGRESS_TARGETS += test-openssl-verify-common

Re: LibreSSL regressions

2021-02-15 Thread Jan Klemkow
On Tue, Feb 16, 2021 at 04:36:59AM +1100, Joel Sing wrote:
> On 21-02-15 14:49:46, Jan Klemkow wrote:
> > +create-libressl-test-certs: create-libressl-test-certs.pl
> > +   ${PERL} ${.CURDIR}/$@.pl
> 
> We can see how this goes, however we may end up wanting to generate
> the certificates and commit them rather than regenerating on each
> run. The other advantage is that p5-IO-Socket-SSL would only be
> needed to regenerate the certificates and not actually run the
> tests.

What should I do?  Just commit the generated files and remove the Perl
script?

> > Index: regress/lib/libcrypto/validate/create-libressl-test-certs.pl
> > ===
> > RCS file: regress/lib/libcrypto/validate/create-libressl-test-certs.pl
> > diff -N regress/lib/libcrypto/validate/create-libressl-test-certs.pl
> > --- /dev/null   1 Jan 1970 00:00:00 -
> > +++ regress/lib/libcrypto/validate/create-libressl-test-certs.pl15 Feb 
> > 2021 12:54:58 -
> > @@ -0,0 +1,111 @@
> > +#!/usr/bin/perl
> > +
> > +# Copyright (c) 2021 Steffen Ullrich 
> > +# Public Domain
> > +
> > +use strict;
> > +use warnings;
> > +use IO::Socket::SSL::Utils;
> > +
> > +# primitive CA - ROOT
> > +my @ca = cert(
> > +CA => 1,
> > +subject => { CN => 'ROOT' }
> > +);
> > +out('caR.pem', pem(crt => $ca[0]));
> > +out('caR.key', pem(key => $ca[1]));
> > +
> > +# server certificate where SAN contains in-label wildcards which are 
> > allowed by
> > +# RFC 6125
> 
> It is worth noting that per the RFC, a client MAY allow in-label
> wildcards (this is not a MUST or even a SHOULD). Additionally,
> various software does not allow or support this (for example, libtls
> and hence ftp(1)).

What should I do here?

Thanks,
Jan



Re: LibreSSL regressions

2021-02-15 Thread Jan Klemkow
On Sat, Feb 13, 2021 at 03:53:48PM +0100, Theo Buehler wrote:
> On Sat, Feb 13, 2021 at 11:58:04AM +0100, Jan Klemkow wrote:
> > A coworker of mine has made tests with LibreSSL [1] and found some
> > regressions.  I took his test descriptions and created the following
> > automated regression test.  In the repository he described his findings
> > in detail.  I kept the numbers of the files and subtests in the target
> > names for now.  So, its easier to match it with his files.
> > 
> > I don't know how to handle the result of "test-01-ssl".  Thats why its
> > just a comment.  Someone may have an idea to handle this properly.
> > 
> > Any comments, wishes or OK's?
> > 
> > [1]: https://github.com/noxxi/libressl-tests
> 
> First of all thanks for the effort!
> 
> The perl script and probably also the Makefile should have a license.
> 
> Please add a check that tests whether the required perl modules are
> installed (p5-IO-Socket-SSL and p5-Net-SSLeay) and otherwise prints
> SKIPPED and their names, so I can install them if they're not present.
> I never remember their exact capitalization and hyphenation...
> 
> Various comments inline, and a patch for openssl(1) at the end that may
> simplify some things.

This is an updated version of the test including comments and wishes
from tb@ and bluhm@.

OK?

Thanks,
Jan

Index: regress/lib/libcrypto/validate/Makefile
===
RCS file: regress/lib/libcrypto/validate/Makefile
diff -N regress/lib/libcrypto/validate/Makefile
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ regress/lib/libcrypto/validate/Makefile 15 Feb 2021 13:38:22 -
@@ -0,0 +1,133 @@
+# $OpenBSD$
+
+# Copyright (c) 2021 Jan Klemkow 
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This regression test is based on manual test descriptions from:
+# https://github.com/noxxi/libressl-tests
+
+# The following port must be installed for the regression tests:
+# p5-IO-Socket-SSL perl interface to SSL sockets
+
+PERL = perl
+OPENSSL ?= openssl
+
+PERL_REQUIRE !=perl -Mstrict -Mwarnings -e ' \
+eval { require IO::Socket::SSL } or print $@; \
+'
+.if ! empty (PERL_REQUIRE)
+regress:
+   @echo "${PERL_REQUIRE}"
+   @echo install these perl packages for additional tests
+   @echo SKIPPED
+.endif
+
+REGRESS_TARGETS += test-unusual-wildcard-cert-no-CA-client
+REGRESS_TARGETS += test-unusual-wildcard-cert-CA-client
+REGRESS_TARGETS += test-common-wildcard-cert-no-CA-client
+REGRESS_TARGETS += test-common wildcard-cert-CA-client
+REGRESS_TARGETS += test-verify-unusual-wildcard-cert
+REGRESS_TARGETS += test-openssl-verify-common-wildcard-cert
+REGRESS_TARGETS += test-chain-certificates-s_server
+REGRESS_TARGETS += test-alternative-chain
+REGRESS_CLEANUP =  cleanup-ssl
+REGRESS_SETUP_ONCE =   create-libressl-test-certs
+
+REGRESS_EXPECTED_FAILURES +=   test-unusual-wildcard-cert-no-CA-client
+REGRESS_EXPECTED_FAILURES +=   test-common-wildcard-cert-no-CA-client
+REGRESS_EXPECTED_FAILURES +=   test-common wildcard-cert-CA-client
+REGRESS_EXPECTED_FAILURES +=   test-verify-unusual-wildcard-cert
+REGRESS_EXPECTED_FAILURES +=   test-alternative-chain
+
+create-libressl-test-certs: create-libressl-test-certs.pl
+   ${PERL} ${.CURDIR}/$@.pl
+
+cleanup-ssl:
+   rm *.pem *.key
+
+test-unusual-wildcard-cert-no-CA-client:
+   # unusual wildcard cert, no CA given to client
+   # start client
+   ${OPENSSL} s_server -cert server-unusual-wildcard.pem \
+   -key server-unusual-wildcard.pem & \
+   timeout=$$(($$(date +%s) + 5)); \
+   while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \
+   do test $$(date +%s) -lt $$timeout || exit 1; done
+   # start client
+   echo "Q" | ${OPENSSL} s_client -verify_return_error \
+   | grep "Verify return code: 21"
+
+test-unusual-wildcard-cert-CA-client:
+   # unusual wildcard cert, CA given to client
+   # start server
+   ${OPENSSL} s_server -cert server-unusual-wildcard.pem \
+   -key ser

LibreSSL regressions

2021-02-13 Thread Jan Klemkow
Hi,

A coworker of mine has made tests with LibreSSL [1] and found some
regressions.  I took his test descriptions and created the following
automated regression test.  In the repository he described his findings
in detail.  I kept the numbers of the files and subtests in the target
names for now.  So, its easier to match it with his files.

I don't know how to handle the result of "test-01-ssl".  Thats why its
just a comment.  Someone may have an idea to handle this properly.

Any comments, wishes or OK's?

bye,
Jan

[1]: https://github.com/noxxi/libressl-tests

Index: regress/lib/libssl/Makefile
===
RCS file: /cvs/src/regress/lib/libssl/Makefile,v
retrieving revision 1.42
diff -u -p -r1.42 Makefile
--- regress/lib/libssl/Makefile 14 Oct 2020 15:53:22 -  1.42
+++ regress/lib/libssl/Makefile 12 Feb 2021 19:42:56 -
@@ -16,6 +16,7 @@ SUBDIR += tlsext
 SUBDIR += tlslegacy
 SUBDIR += key_schedule
 SUBDIR += unit
+SUBDIR += validate
 
 # Things that take a long time should go below here. 
 SUBDIR += tlsfuzzer
Index: regress/lib/libssl/validate/Makefile
===
RCS file: regress/lib/libssl/validate/Makefile
diff -N regress/lib/libssl/validate/Makefile
--- /dev/null   1 Jan 1970 00:00:00 -
+++ regress/lib/libssl/validate/Makefile13 Feb 2021 10:50:30 -
@@ -0,0 +1,104 @@
+# Tests from: https://github.com/noxxi/libressl-tests
+
+PERL=perl
+
+REGRESS_TARGETS =  test-00-01-ssl
+REGRESS_TARGETS += test-00-02-ssl
+REGRESS_TARGETS += test-00-03-ssl
+REGRESS_TARGETS += test-00-04-ssl
+REGRESS_TARGETS += test-00-05-ssl
+REGRESS_TARGETS += test-00-06-ssl
+REGRESS_TARGETS += test-01-ssl
+REGRESS_TARGETS += test-02-ssl
+REGRESS_ROOT_TARGETS = ${REGRESS_TARGETS}
+REGRESS_CLEANUP =  cleanup-ssl
+REGRESS_SETUP =create-libressl-test-certs
+
+create-libressl-test-certs: create-libressl-test-certs.pl
+   ${PERL} ${.CURDIR}/$@.pl
+
+cleanup-ssl:
+   pkill openssl || true
+   rm *.pem *.key
+
+test-00-01-ssl:
+   # unusual wildcard cert, no CA given to client
+   # cleanup
+   pkill openssl || true
+   sleep 2
+   # start client
+   ${KTRACE} openssl s_server -cert server-unusual-wildcard.pem \
+   -key server-unusual-wildcard.pem -www & \
+   timeout=$$(($$(date +%s) + 5)); \
+   while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \
+   do test $$(date +%s) -lt $$timeout || exit 1; done
+   # start client
+   echo "data" | openssl s_client -verify_return_error -connect 
127.0.0.1:4433 \
+   | grep "Verify return code: 21"
+
+test-00-02-ssl:
+   # unusual wildcard cert, CA given to client
+   # cleanup
+   pkill openssl || true
+   sleep 2
+   # start server
+   ${KTRACE} openssl s_server -cert server-unusual-wildcard.pem \
+   -key server-unusual-wildcard.pem -www & \
+   timeout=$$(($$(date +%s) + 5)); \
+   while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \
+   do test $$(date +%s) -lt $$timeout || exit 1; done
+   # start client
+   echo "data" | openssl s_client -connect 127.0.0.1:4433 -CAfile caR.pem \
+   | grep "Verify return code: 0"
+
+test-00-03-ssl:
+   # common wildcard cert, no CA given to client
+   # cleanup
+   pkill openssl || true
+   sleep 2
+   # start server
+   ${KTRACE} openssl s_server -cert server-common-wildcard.pem \
+   -key server-common-wildcard.pem -www & \
+   timeout=$$(($$(date +%s) + 5)); \
+   while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \
+   do test $$(date +%s) -lt $$timeout || exit 1; done
+   # start client
+   echo "data" | openssl s_client -connect 127.0.0.1:4433 \
+   | grep "Verify return code: 21"
+
+test-00-04-ssl:
+   # common wildcard cert, CA given to client
+   # cleanup
+   pkill openssl || true
+   sleep 2
+   # start server
+   ${KTRACE} openssl s_server -cert server-unusual-wildcard.pem \
+   -key server-unusual-wildcard.pem -www & \
+   timeout=$$(($$(date +%s) + 5)); \
+   while fstat -p $$! | ! grep -q 'tcp .* \*:4433$$'; \
+   do test $$(date +%s) -lt $$timeout || exit 1; done
+   # start client
+   echo "data" | openssl s_client -connect 127.0.0.1:4433 -CAfile caR.pem \
+   | grep "Verify return code: 21"
+
+test-00-05-ssl:
+   # openssl verify, unusual wildcard cert
+   openssl verify -CAfile caR.pem server-unusual-wildcard.pem \
+   | grep "server-unusual-wildcard.pem: OK"
+
+test-00-06-ssl:
+   # openssl verify, common wildcard cert
+   openssl verify -CAfile caR.pem server-common-wildcard.pem \
+   | grep "server-common-wildcard.pem: OK"
+
+test-01-ssl:
+   # Not all chain certificates are sent in s_server
+   # o

Re: diff: tcp ack improvement

2021-02-08 Thread Jan Klemkow
On Mon, Feb 08, 2021 at 03:42:54PM +0100, Alexander Bluhm wrote:
> On Wed, Feb 03, 2021 at 11:20:04AM +0100, Claudio Jeker wrote:
> > Just commit it. OK claudio@
> > If people see problems we can back it out again.
> 
> This has huge impact on TCP performance.
> 
> http://bluhm.genua.de/perform/results/2021-02-07T00%3A01%3A40Z/perform.html
> 
> For a single TCP connection between to OpenBSD boxes, througput
> drops by 77% from 3.1 GBit/sec to 710 MBit/sec.  But with 100
> parallel connections the througput over all increases by 5%.

For single connections our kernel is limited to send out 4 max TCP
segments.  I don't see that, because I just measured with 10 and 30
streams in parallel.

FreeBSD disabled it 20 yeas ago.
https://github.com/freebsd/freebsd-src/commit/d912c694ee00de5ea0f46743295a0fc603cab562

I would suggest to remove the whole feature.

bye,
Jan

Index: tcp.h
===
RCS file: /cvs/src/sys/netinet/tcp.h,v
retrieving revision 1.21
diff -u -p -r1.21 tcp.h
--- tcp.h   10 Jul 2019 18:45:31 -  1.21
+++ tcp.h   8 Feb 2021 17:52:38 -
@@ -105,8 +105,6 @@ struct tcphdr {
 #defineTCP_MAX_SACK3   /* Max # SACKs sent in any segment */
 #defineTCP_SACKHOLE_LIMIT 128  /* Max # SACK holes per connection */
 
-#defineTCP_MAXBURST4   /* Max # packets after leaving Fast 
Rxmit */
-
 /*
  * Default maximum segment size for TCP.
  * With an IP MSS of 576, this is 536,
Index: tcp_output.c
===
RCS file: /cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.129
diff -u -p -r1.129 tcp_output.c
--- tcp_output.c25 Jan 2021 03:40:46 -  1.129
+++ tcp_output.c8 Feb 2021 17:53:07 -
@@ -203,7 +203,6 @@ tcp_output(struct tcpcb *tp)
int idle, sendalot = 0;
int i, sack_rxmit = 0;
struct sackhole *p;
-   int maxburst = TCP_MAXBURST;
 #ifdef TCP_SIGNATURE
unsigned int sigoff;
 #endif /* TCP_SIGNATURE */
@@ -1120,7 +1119,7 @@ out:
tp->last_ack_sent = tp->rcv_nxt;
tp->t_flags &= ~TF_ACKNOW;
TCP_TIMER_DISARM(tp, TCPT_DELACK);
-   if (sendalot && --maxburst)
+   if (sendalot)
goto again;
return (0);
 }



  1   2   3   >