Re: Fix loadavg(3)
> > Date: Mon, 14 Nov 2016 13:11:58 +0100 > > From: Martin Pieuchot> > > > On 12/11/16(Sat) 15:52, patrick keshishian wrote: > > > Ahh... seems the culprit is softclock_thread added 2016/09/22 > > > (kern/kern_timeout.c mpi@). > > > > I'd suggest we simply skip kernel thread when calculating the load. > > Hmm, I'd say that counting runnable kernel threads towards the load is > the right thing to do. > > > Since we're slowly moving code executed in software interrupt > > context to kernel threads this will keep the original behavior. > > A different way of viewing is that this move now enables us to > properly take into account the work done by the network stack. Ah, it is Patrick again, wanting low load averages rather than accurate load averages. Not understanding the loadavg comes without any promise as looks. The load average is an ABSTRACT measurement of work that is queued to be done. As more parts of the kernel become desyncronized from the big lock, that number will change. If you want the old load average number, run old code with the old behaviours. I agree with Mark. The kernel threads should be counted.
pf af-to route output
Hi, The !r->rt case is only used by af-to. pf_route6() calls ip6_output() to do the work while pf_route() has some custom implementation for that. It is simpler to call ip_output() or ip6_output() from pf_test() directly. ok? bluhm Index: net/pf.c === RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v retrieving revision 1.998 diff -u -p -r1.998 pf.c --- net/pf.c14 Nov 2016 13:25:00 - 1.998 +++ net/pf.c14 Nov 2016 14:08:57 - @@ -5820,50 +5820,34 @@ pf_route(struct pf_pdesc *pd, struct pf_ dst->sin_addr = ip->ip_dst; rtableid = m0->m_pkthdr.ph_rtableid; - if (!r->rt) { - rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); - if (rt == NULL) { - ipstat_inc(ips_noroute); + if (s == NULL) { + bzero(sns, sizeof(sns)); + if (pf_map_addr(AF_INET, r, + (struct pf_addr *)>ip_src, + , NULL, sns, >route, PF_SN_ROUTE)) { + DPFPRINTF(LOG_ERR, + "pf_route: pf_map_addr() failed."); goto bad; } - ifp = if_get(rt->rt_ifidx); - - if (rt->rt_flags & RTF_GATEWAY) - dst = satosin(rt->rt_gateway); - - m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; + if (!PF_AZERO(, AF_INET)) + dst->sin_addr.s_addr = naddr.v4.s_addr; + ifp = r->route.kif ? + r->route.kif->pfik_ifp : NULL; } else { - if (s == NULL) { - bzero(sns, sizeof(sns)); - if (pf_map_addr(AF_INET, r, - (struct pf_addr *)>ip_src, - , NULL, sns, >route, PF_SN_ROUTE)) { - DPFPRINTF(LOG_ERR, - "pf_route: pf_map_addr() failed."); - goto bad; - } - - if (!PF_AZERO(, AF_INET)) - dst->sin_addr.s_addr = naddr.v4.s_addr; - ifp = r->route.kif ? - r->route.kif->pfik_ifp : NULL; - } else { - if (!PF_AZERO(>rt_addr, AF_INET)) - dst->sin_addr.s_addr = - s->rt_addr.v4.s_addr; - ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; - } - - rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); - if (rt == NULL) { - ipstat_inc(ips_noroute); - goto bad; - } + if (!PF_AZERO(>rt_addr, AF_INET)) + dst->sin_addr.s_addr = + s->rt_addr.v4.s_addr; + ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } if (ifp == NULL) goto bad; + rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid); + if (rt == NULL) { + ipstat_inc(ips_noroute); + goto bad; + } if (pd->kif->pfik_ifp != ifp) { if (pf_test(AF_INET, PF_OUT, ifp, ) != PF_PASS) @@ -5928,8 +5912,6 @@ pf_route(struct pf_pdesc *pd, struct pf_ done: if (r->rt != PF_DUPTO) pd->m = NULL; - if (!r->rt) - if_put(ifp); rtfree(rt); return; @@ -5982,12 +5964,6 @@ pf_route6(struct pf_pdesc *pd, struct pf dst->sin6_addr = ip6->ip6_dst; rtableid = m0->m_pkthdr.ph_rtableid; - if (!r->rt) { - m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; - ip6_output(m0, NULL, NULL, 0, NULL, NULL); - goto done; - } - if (s == NULL) { bzero(sns, sizeof(sns)); if (pf_map_addr(AF_INET6, r, (struct pf_addr *)>ip6_src, @@ -6908,10 +6884,28 @@ done: action = PF_DROP; break; } - if (pd.naf == AF_INET) - pf_route(, r, s); - if (pd.naf == AF_INET6) - pf_route6(, r, s); + if (r->rt) { + switch (pd.naf) { + case AF_INET: + pf_route(, r, s); + break; + case AF_INET6: + pf_route6(, r, s); + break; + } + } + if (pd.m) { + pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; + switch (pd.naf) { + case AF_INET: + ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); + break; +
Re: Fix loadavg(3)
> Date: Mon, 14 Nov 2016 13:11:58 +0100 > From: Martin Pieuchot> > On 12/11/16(Sat) 15:52, patrick keshishian wrote: > > Ahh... seems the culprit is softclock_thread added 2016/09/22 > > (kern/kern_timeout.c mpi@). > > I'd suggest we simply skip kernel thread when calculating the load. Hmm, I'd say that counting runnable kernel threads towards the load is the right thing to do. > Since we're slowly moving code executed in software interrupt > context to kernel threads this will keep the original behavior. A different way of viewing is that this move now enables us to properly take into account the work done by the network stack. > Index: uvm/uvm_meter.c > === > RCS file: /cvs/src/sys/uvm/uvm_meter.c,v > retrieving revision 1.36 > diff -u -p -r1.36 uvm_meter.c > --- uvm/uvm_meter.c 14 Mar 2015 03:38:53 - 1.36 > +++ uvm/uvm_meter.c 14 Nov 2016 12:08:11 - > @@ -107,6 +107,9 @@ uvm_loadav(struct loadavg *avg) > memset(nrun_cpu, 0, sizeof(nrun_cpu)); > > LIST_FOREACH(p, , p_list) { > + if (p->p_flag & P_SYSTEM) > + continue; > + > switch (p->p_stat) { > case SSLEEP: > if (p->p_priority > PZERO || p->p_slptime > 1) > @@ -114,8 +117,6 @@ uvm_loadav(struct loadavg *avg) > /* FALLTHROUGH */ > case SRUN: > case SONPROC: > - if (p == p->p_cpu->ci_schedstate.spc_idleproc) > - continue; > case SIDL: > nrun++; > if (p->p_cpu) > @@ -136,7 +137,7 @@ uvm_loadav(struct loadavg *avg) > spc->spc_ldavg = (cexp[0] * spc->spc_ldavg + > nrun_cpu[CPU_INFO_UNIT(ci)] * FSCALE * > (FSCALE - cexp[0])) >> FSHIFT; > - } > + } > } > > /* > >
Fix loadavg(3)
On 12/11/16(Sat) 15:52, patrick keshishian wrote: > Ahh... seems the culprit is softclock_thread added 2016/09/22 > (kern/kern_timeout.c mpi@). I'd suggest we simply skip kernel thread when calculating the load. Since we're slowly moving code executed in software interrupt context to kernel threads this will keep the original behavior. ok? Index: uvm/uvm_meter.c === RCS file: /cvs/src/sys/uvm/uvm_meter.c,v retrieving revision 1.36 diff -u -p -r1.36 uvm_meter.c --- uvm/uvm_meter.c 14 Mar 2015 03:38:53 - 1.36 +++ uvm/uvm_meter.c 14 Nov 2016 12:08:11 - @@ -107,6 +107,9 @@ uvm_loadav(struct loadavg *avg) memset(nrun_cpu, 0, sizeof(nrun_cpu)); LIST_FOREACH(p, , p_list) { + if (p->p_flag & P_SYSTEM) + continue; + switch (p->p_stat) { case SSLEEP: if (p->p_priority > PZERO || p->p_slptime > 1) @@ -114,8 +117,6 @@ uvm_loadav(struct loadavg *avg) /* FALLTHROUGH */ case SRUN: case SONPROC: - if (p == p->p_cpu->ci_schedstate.spc_idleproc) - continue; case SIDL: nrun++; if (p->p_cpu) @@ -136,7 +137,7 @@ uvm_loadav(struct loadavg *avg) spc->spc_ldavg = (cexp[0] * spc->spc_ldavg + nrun_cpu[CPU_INFO_UNIT(ci)] * FSCALE * (FSCALE - cexp[0])) >> FSHIFT; - } + } } /*
Re: use per cpu counters for udp statistics
On 14 November 2016 at 09:38, Martin Pieuchotwrote: > On 14/11/16(Mon) 14:29, David Gwynne wrote: >> its as close to the ipstat change as i can make it. >> >> ok? > > [...] > >> @@ -142,6 +143,7 @@ void udp_notify(struct inpcb *, int); >> void >> udp_init(void) >> { >> + udpcounters = counters_alloc(udps_ncounters, M_COUNTERS); > > No need to pass M_COUNTERS to counter_alloc(), it's repetitive :) > Yes, I've also pointed that out in my previous mail for IP stats. > Other than that ok mpi@ > Looks good to me as well. OK mikeb
Re: umb: NCM datagram pointer entries
> Date: Mon, 14 Nov 2016 10:51:03 +0100 > From: Gerhard Roth> > Hi, > > according to the NCM spec, the list of datagram pointer entries has to > be terminated with an entry where wDatagramIndex and wDatagramLen are > zero. Not all implementations seem to follow that rule: otto@ had one > that only sets the index to zero while using an arbitrary length value. > > The patch below fixes the parsing to stop if any of those values is > zero. It was successfully tested by otto@ Looks reasonable to me; ok kettenis@ > Index: if_umb.c > === > RCS file: /cvs/src/sys/dev/usb/if_umb.c,v > retrieving revision 1.5 > diff -u -p -u -p -r1.5 if_umb.c > --- if_umb.c 10 Nov 2016 14:45:43 - 1.5 > +++ if_umb.c 14 Nov 2016 09:34:29 - > @@ -1815,7 +1815,7 @@ umb_decap(struct umb_softc *sc, struct u > } > > /* Terminating zero entry */ > - if (dlen == 0 && doff == 0) > + if (dlen == 0 || doff == 0) > break; > if (len < dlen + doff) { > /* Skip giant datagram but continue processing */ > >
umb: NCM datagram pointer entries
Hi, according to the NCM spec, the list of datagram pointer entries has to be terminated with an entry where wDatagramIndex and wDatagramLen are zero. Not all implementations seem to follow that rule: otto@ had one that only sets the index to zero while using an arbitrary length value. The patch below fixes the parsing to stop if any of those values is zero. It was successfully tested by otto@ Gerhard Index: if_umb.c === RCS file: /cvs/src/sys/dev/usb/if_umb.c,v retrieving revision 1.5 diff -u -p -u -p -r1.5 if_umb.c --- if_umb.c10 Nov 2016 14:45:43 - 1.5 +++ if_umb.c14 Nov 2016 09:34:29 - @@ -1815,7 +1815,7 @@ umb_decap(struct umb_softc *sc, struct u } /* Terminating zero entry */ - if (dlen == 0 && doff == 0) + if (dlen == 0 || doff == 0) break; if (len < dlen + doff) { /* Skip giant datagram but continue processing */
Re: bpf_mtap(9) w/o KERNEL_LOCK
On 13/09/16(Tue) 12:23, Martin Pieuchot wrote: > Here's the big scary diff I've been using for some months now to stop > grabbing the KERNEL_LOCK() in bpf_mtap(9). This has been originally > written to prevent lock ordering inside pf_test(). Now that we're > heading toward using a rwlock, we won't have this problem, but fewer > usages of KERNEL_LOCK() is still interesting. > > I'm going to split this diff in small chunks to ease the review. But > I'd appreciate if people could try to break it, test & report back. > > Some notes: > > - Now that selwakeup() is called in a thread context (task) we only > rely on the KERNEL_LOCK() to serialize access to kqueue(9) data. > > - The reference counting is here to make sure a descriptor is not > freed during a sleep. That's why the KERNEL_LOCK() is still > necessary in the slow path. On the other hand bpf_catchpacket() > relies on the reference guaranteed by the SRP list. > > - A mutex now protects the rotating buffers and their associated > fields. It is dropped before calling ifpromisc() because USB > devices sleep. > > - The dance around uiomove(9) is here to check that buffers aren't > rotated while data is copied to userland. Setting ``b->bd_fbuf'' > to NULL should be enough to let bpf_catchpacket() to drop the > patcket. But I added ``__in_uiomove'' to be able to have usable > panic if something weird happen. > > Comments? I've got many test reports but no actual review... anyone? Here's another extracted diff to move forward. Let bpf_allocbufs() fail when allocating memory, this way we can call it while holding a mutex. ok? Index: net/bpf.c === RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.150 diff -u -p -r1.150 bpf.c --- net/bpf.c 16 Oct 2016 18:05:41 - 1.150 +++ net/bpf.c 14 Nov 2016 09:02:51 - @@ -92,7 +92,7 @@ int bpf_maxbufsize = BPF_MAXBUFSIZE; struct bpf_if *bpf_iflist; LIST_HEAD(, bpf_d) bpf_d_list; -void bpf_allocbufs(struct bpf_d *); +intbpf_allocbufs(struct bpf_d *); void bpf_ifname(struct ifnet *, struct ifreq *); int_bpf_mtap(caddr_t, const struct mbuf *, u_int, void (*)(const void *, void *, size_t)); @@ -996,6 +996,7 @@ int bpf_setif(struct bpf_d *d, struct ifreq *ifr) { struct bpf_if *bp, *candidate = NULL; + int error = 0; int s; /* @@ -1012,30 +1013,33 @@ bpf_setif(struct bpf_d *d, struct ifreq candidate = bp; } - if (candidate != NULL) { - /* -* Allocate the packet buffers if we need to. -* If we're already attached to requested interface, -* just flush the buffer. -*/ - if (d->bd_sbuf == NULL) - bpf_allocbufs(d); - s = splnet(); - if (candidate != d->bd_bif) { - if (d->bd_bif) - /* -* Detach if attached to something else. -*/ - bpf_detachd(d); + /* Not found. */ + if (candidate == NULL) + return (ENXIO); - bpf_attachd(d, candidate); - } - bpf_reset_d(d); - splx(s); - return (0); + /* +* Allocate the packet buffers if we need to. +* If we're already attached to requested interface, +* just flush the buffer. +*/ + s = splnet(); + if (d->bd_sbuf == NULL) { + if ((error = bpf_allocbufs(d))) + goto out; } - /* Not found. */ - return (ENXIO); + if (candidate != d->bd_bif) { + if (d->bd_bif) + /* +* Detach if attached to something else. +*/ + bpf_detachd(d); + + bpf_attachd(d, candidate); + } + bpf_reset_d(d); +out: + splx(s); + return (error); } /* @@ -1434,13 +1438,23 @@ bpf_catchpacket(struct bpf_d *d, u_char /* * Initialize all nonzero fields of a descriptor. */ -void +int bpf_allocbufs(struct bpf_d *d) { - d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK); - d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK); + d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); + if (d->bd_fbuf == NULL) + return (ENOMEM); + + d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); + if (d->bd_sbuf == NULL) { + free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); + return (ENOMEM); + } + d->bd_slen = 0; d->bd_hlen = 0; + + return (0); } void
Re: use per cpu counters for udp statistics
On 14/11/16(Mon) 14:29, David Gwynne wrote: > its as close to the ipstat change as i can make it. > > ok? [...] > @@ -142,6 +143,7 @@ void udp_notify(struct inpcb *, int); > void > udp_init(void) > { > + udpcounters = counters_alloc(udps_ncounters, M_COUNTERS); No need to pass M_COUNTERS to counter_alloc(), it's repetitive :) Other than that ok mpi@