Re: Fix loadavg(3)

2016-11-14 Thread Theo de Raadt
> > Date: Mon, 14 Nov 2016 13:11:58 +0100
> > From: Martin Pieuchot 
> > 
> > On 12/11/16(Sat) 15:52, patrick keshishian wrote:
> > > Ahh... seems the culprit is softclock_thread added 2016/09/22
> > > (kern/kern_timeout.c mpi@).
> > 
> > I'd suggest we simply skip kernel thread when calculating the load.
> 
> Hmm, I'd say that counting runnable kernel threads towards the load is
> the right thing to do.
> 
> > Since we're slowly moving code executed in software interrupt
> > context to kernel threads this will keep the original behavior.
> 
> A different way of viewing is that this move now enables us to
> properly take into account the work done by the network stack.

Ah, it is Patrick again, wanting low load averages rather than
accurate load averages.  Not understanding the loadavg comes without
any promise as looks.

The load average is an ABSTRACT measurement of work that is queued
to be done.  As more parts of the kernel become desyncronized from
the big lock, that number will change.

If you want the old load average number, run old code with the old
behaviours.

I agree with Mark.  The kernel threads should be counted.



pf af-to route output

2016-11-14 Thread Alexander Bluhm
Hi,

The !r->rt case is only used by af-to.  pf_route6() calls ip6_output()
to do the work while pf_route() has some custom implementation for
that.  It is simpler to call ip_output() or ip6_output() from
pf_test() directly.

ok?

bluhm

Index: net/pf.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.998
diff -u -p -r1.998 pf.c
--- net/pf.c14 Nov 2016 13:25:00 -  1.998
+++ net/pf.c14 Nov 2016 14:08:57 -
@@ -5820,50 +5820,34 @@ pf_route(struct pf_pdesc *pd, struct pf_
dst->sin_addr = ip->ip_dst;
rtableid = m0->m_pkthdr.ph_rtableid;
 
-   if (!r->rt) {
-   rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid);
-   if (rt == NULL) {
-   ipstat_inc(ips_noroute);
+   if (s == NULL) {
+   bzero(sns, sizeof(sns));
+   if (pf_map_addr(AF_INET, r,
+   (struct pf_addr *)>ip_src,
+   , NULL, sns, >route, PF_SN_ROUTE)) {
+   DPFPRINTF(LOG_ERR,
+   "pf_route: pf_map_addr() failed.");
goto bad;
}
 
-   ifp = if_get(rt->rt_ifidx);
-
-   if (rt->rt_flags & RTF_GATEWAY)
-   dst = satosin(rt->rt_gateway);
-
-   m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
+   if (!PF_AZERO(, AF_INET))
+   dst->sin_addr.s_addr = naddr.v4.s_addr;
+   ifp = r->route.kif ?
+   r->route.kif->pfik_ifp : NULL;
} else {
-   if (s == NULL) {
-   bzero(sns, sizeof(sns));
-   if (pf_map_addr(AF_INET, r,
-   (struct pf_addr *)>ip_src,
-   , NULL, sns, >route, PF_SN_ROUTE)) {
-   DPFPRINTF(LOG_ERR,
-   "pf_route: pf_map_addr() failed.");
-   goto bad;
-   }
-
-   if (!PF_AZERO(, AF_INET))
-   dst->sin_addr.s_addr = naddr.v4.s_addr;
-   ifp = r->route.kif ?
-   r->route.kif->pfik_ifp : NULL;
-   } else {
-   if (!PF_AZERO(>rt_addr, AF_INET))
-   dst->sin_addr.s_addr =
-   s->rt_addr.v4.s_addr;
-   ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
-   }
-
-   rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid);
-   if (rt == NULL) {
-   ipstat_inc(ips_noroute);
-   goto bad;
-   }
+   if (!PF_AZERO(>rt_addr, AF_INET))
+   dst->sin_addr.s_addr =
+   s->rt_addr.v4.s_addr;
+   ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
}
if (ifp == NULL)
goto bad;
 
+   rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid);
+   if (rt == NULL) {
+   ipstat_inc(ips_noroute);
+   goto bad;
+   }
 
if (pd->kif->pfik_ifp != ifp) {
if (pf_test(AF_INET, PF_OUT, ifp, ) != PF_PASS)
@@ -5928,8 +5912,6 @@ pf_route(struct pf_pdesc *pd, struct pf_
 done:
if (r->rt != PF_DUPTO)
pd->m = NULL;
-   if (!r->rt)
-   if_put(ifp);
rtfree(rt);
return;
 
@@ -5982,12 +5964,6 @@ pf_route6(struct pf_pdesc *pd, struct pf
dst->sin6_addr = ip6->ip6_dst;
rtableid = m0->m_pkthdr.ph_rtableid;
 
-   if (!r->rt) {
-   m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
-   ip6_output(m0, NULL, NULL, 0, NULL, NULL);
-   goto done;
-   }
-
if (s == NULL) {
bzero(sns, sizeof(sns));
if (pf_map_addr(AF_INET6, r, (struct pf_addr *)>ip6_src,
@@ -6908,10 +6884,28 @@ done:
action = PF_DROP;
break;
}
-   if (pd.naf == AF_INET)
-   pf_route(, r, s);
-   if (pd.naf == AF_INET6)
-   pf_route6(, r, s);
+   if (r->rt) {
+   switch (pd.naf) {
+   case AF_INET:
+   pf_route(, r, s);
+   break;
+   case AF_INET6:
+   pf_route6(, r, s);
+   break;
+   }
+   }
+   if (pd.m) {
+   pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
+   switch (pd.naf) {
+   case AF_INET:
+   ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0);
+   break;
+   

Re: Fix loadavg(3)

2016-11-14 Thread Mark Kettenis
> Date: Mon, 14 Nov 2016 13:11:58 +0100
> From: Martin Pieuchot 
> 
> On 12/11/16(Sat) 15:52, patrick keshishian wrote:
> > Ahh... seems the culprit is softclock_thread added 2016/09/22
> > (kern/kern_timeout.c mpi@).
> 
> I'd suggest we simply skip kernel thread when calculating the load.

Hmm, I'd say that counting runnable kernel threads towards the load is
the right thing to do.

> Since we're slowly moving code executed in software interrupt
> context to kernel threads this will keep the original behavior.

A different way of viewing is that this move now enables us to
properly take into account the work done by the network stack.


> Index: uvm/uvm_meter.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 uvm_meter.c
> --- uvm/uvm_meter.c   14 Mar 2015 03:38:53 -  1.36
> +++ uvm/uvm_meter.c   14 Nov 2016 12:08:11 -
> @@ -107,6 +107,9 @@ uvm_loadav(struct loadavg *avg)
>   memset(nrun_cpu, 0, sizeof(nrun_cpu));
>  
>   LIST_FOREACH(p, , p_list) {
> + if (p->p_flag & P_SYSTEM)
> + continue;
> +
>   switch (p->p_stat) {
>   case SSLEEP:
>   if (p->p_priority > PZERO || p->p_slptime > 1)
> @@ -114,8 +117,6 @@ uvm_loadav(struct loadavg *avg)
>   /* FALLTHROUGH */
>   case SRUN:
>   case SONPROC:
> - if (p == p->p_cpu->ci_schedstate.spc_idleproc)
> - continue;
>   case SIDL:
>   nrun++;
>   if (p->p_cpu)
> @@ -136,7 +137,7 @@ uvm_loadav(struct loadavg *avg)
>   spc->spc_ldavg = (cexp[0] * spc->spc_ldavg +
>   nrun_cpu[CPU_INFO_UNIT(ci)] * FSCALE *
>   (FSCALE - cexp[0])) >> FSHIFT;
> - }   
> + }
>  }
>  
>  /*
> 
> 



Fix loadavg(3)

2016-11-14 Thread Martin Pieuchot
On 12/11/16(Sat) 15:52, patrick keshishian wrote:
> Ahh... seems the culprit is softclock_thread added 2016/09/22
> (kern/kern_timeout.c mpi@).

I'd suggest we simply skip kernel thread when calculating the load.

Since we're slowly moving code executed in software interrupt
context to kernel threads this will keep the original behavior.

ok?

Index: uvm/uvm_meter.c
===
RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
retrieving revision 1.36
diff -u -p -r1.36 uvm_meter.c
--- uvm/uvm_meter.c 14 Mar 2015 03:38:53 -  1.36
+++ uvm/uvm_meter.c 14 Nov 2016 12:08:11 -
@@ -107,6 +107,9 @@ uvm_loadav(struct loadavg *avg)
memset(nrun_cpu, 0, sizeof(nrun_cpu));
 
LIST_FOREACH(p, , p_list) {
+   if (p->p_flag & P_SYSTEM)
+   continue;
+
switch (p->p_stat) {
case SSLEEP:
if (p->p_priority > PZERO || p->p_slptime > 1)
@@ -114,8 +117,6 @@ uvm_loadav(struct loadavg *avg)
/* FALLTHROUGH */
case SRUN:
case SONPROC:
-   if (p == p->p_cpu->ci_schedstate.spc_idleproc)
-   continue;
case SIDL:
nrun++;
if (p->p_cpu)
@@ -136,7 +137,7 @@ uvm_loadav(struct loadavg *avg)
spc->spc_ldavg = (cexp[0] * spc->spc_ldavg +
nrun_cpu[CPU_INFO_UNIT(ci)] * FSCALE *
(FSCALE - cexp[0])) >> FSHIFT;
-   }   
+   }
 }
 
 /*



Re: use per cpu counters for udp statistics

2016-11-14 Thread Mike Belopuhov
On 14 November 2016 at 09:38, Martin Pieuchot  wrote:
> On 14/11/16(Mon) 14:29, David Gwynne wrote:
>> its as close to the ipstat change as i can make it.
>>
>> ok?
>
> [...]
>
>> @@ -142,6 +143,7 @@ void  udp_notify(struct inpcb *, int);
>>  void
>>  udp_init(void)
>>  {
>> + udpcounters = counters_alloc(udps_ncounters, M_COUNTERS);
>
> No need to pass M_COUNTERS to counter_alloc(), it's repetitive :)
>

Yes, I've also pointed that out in my previous mail for IP stats.

> Other than that ok mpi@
>

Looks good to me as well. OK mikeb



Re: umb: NCM datagram pointer entries

2016-11-14 Thread Mark Kettenis
> Date: Mon, 14 Nov 2016 10:51:03 +0100
> From: Gerhard Roth 
> 
> Hi,
> 
> according to the NCM spec, the list of datagram pointer entries has to
> be terminated with an entry where wDatagramIndex and wDatagramLen are
> zero. Not all implementations seem to follow that rule: otto@ had one
> that only sets the index to zero while using an arbitrary length value.
> 
> The patch below fixes the parsing to stop if any of those values is
> zero. It was successfully tested by otto@

Looks reasonable to me; ok kettenis@

> Index: if_umb.c
> ===
> RCS file: /cvs/src/sys/dev/usb/if_umb.c,v
> retrieving revision 1.5
> diff -u -p -u -p -r1.5 if_umb.c
> --- if_umb.c  10 Nov 2016 14:45:43 -  1.5
> +++ if_umb.c  14 Nov 2016 09:34:29 -
> @@ -1815,7 +1815,7 @@ umb_decap(struct umb_softc *sc, struct u
>   }
>  
>   /* Terminating zero entry */
> - if (dlen == 0 && doff == 0)
> + if (dlen == 0 || doff == 0)
>   break;
>   if (len < dlen + doff) {
>   /* Skip giant datagram but continue processing */
> 
> 



umb: NCM datagram pointer entries

2016-11-14 Thread Gerhard Roth
Hi,

according to the NCM spec, the list of datagram pointer entries has to
be terminated with an entry where wDatagramIndex and wDatagramLen are
zero. Not all implementations seem to follow that rule: otto@ had one
that only sets the index to zero while using an arbitrary length value.

The patch below fixes the parsing to stop if any of those values is
zero. It was successfully tested by otto@

Gerhard


Index: if_umb.c
===
RCS file: /cvs/src/sys/dev/usb/if_umb.c,v
retrieving revision 1.5
diff -u -p -u -p -r1.5 if_umb.c
--- if_umb.c10 Nov 2016 14:45:43 -  1.5
+++ if_umb.c14 Nov 2016 09:34:29 -
@@ -1815,7 +1815,7 @@ umb_decap(struct umb_softc *sc, struct u
}
 
/* Terminating zero entry */
-   if (dlen == 0 && doff == 0)
+   if (dlen == 0 || doff == 0)
break;
if (len < dlen + doff) {
/* Skip giant datagram but continue processing */



Re: bpf_mtap(9) w/o KERNEL_LOCK

2016-11-14 Thread Martin Pieuchot
On 13/09/16(Tue) 12:23, Martin Pieuchot wrote:
> Here's the big scary diff I've been using for some months now to stop
> grabbing the KERNEL_LOCK() in bpf_mtap(9).  This has been originally
> written to prevent lock ordering inside pf_test().  Now that we're
> heading toward using a rwlock, we won't have this problem, but fewer
> usages of KERNEL_LOCK() is still interesting.
> 
> I'm going to split this diff in small chunks to ease the review.  But
> I'd appreciate if people could try to break it, test & report back.
> 
> Some notes:
> 
>   - Now that selwakeup() is called in a thread context (task) we only
> rely on the KERNEL_LOCK() to serialize access to kqueue(9) data.
> 
>   - The reference counting is here to make sure a descriptor is not
> freed during a sleep.  That's why the KERNEL_LOCK() is still
> necessary in the slow path.  On the other hand bpf_catchpacket()
> relies on the reference guaranteed by the SRP list.
> 
>   - A mutex now protects the rotating buffers and their associated
> fields.  It is dropped before calling ifpromisc() because USB
> devices sleep.
> 
>   - The dance around uiomove(9) is here to check that buffers aren't
> rotated while data is copied to userland.  Setting ``b->bd_fbuf''
> to NULL should be enough to let bpf_catchpacket() to drop the
> patcket.  But I added ``__in_uiomove'' to be able to have usable
> panic if something weird happen.
> 
> Comments?

I've got many test reports but no actual review...  anyone?

Here's another extracted diff to move forward.  Let bpf_allocbufs()
fail when allocating memory, this way we can call it while holding
a mutex.

ok?

Index: net/bpf.c
===
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.150
diff -u -p -r1.150 bpf.c
--- net/bpf.c   16 Oct 2016 18:05:41 -  1.150
+++ net/bpf.c   14 Nov 2016 09:02:51 -
@@ -92,7 +92,7 @@ int bpf_maxbufsize = BPF_MAXBUFSIZE;
 struct bpf_if  *bpf_iflist;
 LIST_HEAD(, bpf_d) bpf_d_list;
 
-void   bpf_allocbufs(struct bpf_d *);
+intbpf_allocbufs(struct bpf_d *);
 void   bpf_ifname(struct ifnet *, struct ifreq *);
 int_bpf_mtap(caddr_t, const struct mbuf *, u_int,
void (*)(const void *, void *, size_t));
@@ -996,6 +996,7 @@ int
 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 {
struct bpf_if *bp, *candidate = NULL;
+   int error = 0;
int s;
 
/*
@@ -1012,30 +1013,33 @@ bpf_setif(struct bpf_d *d, struct ifreq 
candidate = bp;
}
 
-   if (candidate != NULL) {
-   /*
-* Allocate the packet buffers if we need to.
-* If we're already attached to requested interface,
-* just flush the buffer.
-*/
-   if (d->bd_sbuf == NULL)
-   bpf_allocbufs(d);
-   s = splnet();
-   if (candidate != d->bd_bif) {
-   if (d->bd_bif)
-   /*
-* Detach if attached to something else.
-*/
-   bpf_detachd(d);
+   /* Not found. */
+   if (candidate == NULL)
+   return (ENXIO);
 
-   bpf_attachd(d, candidate);
-   }
-   bpf_reset_d(d);
-   splx(s);
-   return (0);
+   /*
+* Allocate the packet buffers if we need to.
+* If we're already attached to requested interface,
+* just flush the buffer.
+*/
+   s = splnet();
+   if (d->bd_sbuf == NULL) {
+   if ((error = bpf_allocbufs(d)))
+   goto out;
}
-   /* Not found. */
-   return (ENXIO);
+   if (candidate != d->bd_bif) {
+   if (d->bd_bif)
+   /*
+* Detach if attached to something else.
+*/
+   bpf_detachd(d);
+
+   bpf_attachd(d, candidate);
+   }
+   bpf_reset_d(d);
+out:
+   splx(s);
+   return (error);
 }
 
 /*
@@ -1434,13 +1438,23 @@ bpf_catchpacket(struct bpf_d *d, u_char 
 /*
  * Initialize all nonzero fields of a descriptor.
  */
-void
+int
 bpf_allocbufs(struct bpf_d *d)
 {
-   d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
-   d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
+   d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
+   if (d->bd_fbuf == NULL)
+   return (ENOMEM);
+
+   d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
+   if (d->bd_sbuf == NULL) {
+   free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
+   return (ENOMEM);
+   }
+
d->bd_slen = 0;
d->bd_hlen = 0;
+
+   return (0);
 }
 
 void



Re: use per cpu counters for udp statistics

2016-11-14 Thread Martin Pieuchot
On 14/11/16(Mon) 14:29, David Gwynne wrote:
> its as close to the ipstat change as i can make it.
> 
> ok?
 
[...]

> @@ -142,6 +143,7 @@ void  udp_notify(struct inpcb *, int);
>  void
>  udp_init(void)
>  {
> + udpcounters = counters_alloc(udps_ncounters, M_COUNTERS);

No need to pass M_COUNTERS to counter_alloc(), it's repetitive :)

Other than that ok mpi@