On 13/09/16(Tue) 12:23, Martin Pieuchot wrote: > Here's the big scary diff I've been using for some months now to stop > grabbing the KERNEL_LOCK() in bpf_mtap(9). This has been originally > written to prevent lock ordering inside pf_test(). Now that we're > heading toward using a rwlock, we won't have this problem, but fewer > usages of KERNEL_LOCK() is still interesting. > > I'm going to split this diff in small chunks to ease the review. But > I'd appreciate if people could try to break it, test & report back. > > Some notes: > > - Now that selwakeup() is called in a thread context (task) we only > rely on the KERNEL_LOCK() to serialize access to kqueue(9) data. > > - The reference counting is here to make sure a descriptor is not > freed during a sleep. That's why the KERNEL_LOCK() is still > necessary in the slow path. On the other hand bpf_catchpacket() > relies on the reference guaranteed by the SRP list. > > - A mutex now protects the rotating buffers and their associated > fields. It is dropped before calling ifpromisc() because USB > devices sleep. > > - The dance around uiomove(9) is here to check that buffers aren't > rotated while data is copied to userland. Setting ``b->bd_fbuf'' > to NULL should be enough to let bpf_catchpacket() to drop the > patcket. But I added ``__in_uiomove'' to be able to have usable > panic if something weird happen.
Here's another extracted diff: Use goto in read & write and always increment the reference count in write. ok? Index: net/bpf.c =================================================================== RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.151 diff -u -p -r1.151 bpf.c --- net/bpf.c 16 Nov 2016 09:00:01 -0000 1.151 +++ net/bpf.c 16 Nov 2016 11:08:25 -0000 @@ -406,16 +406,17 @@ bpfread(dev_t dev, struct uio *uio, int if (d->bd_bif == NULL) return (ENXIO); + s = splnet(); + bpf_get(d); + /* * Restrict application to use a buffer the same size as * as kernel buffers. */ - if (uio->uio_resid != d->bd_bufsize) - return (EINVAL); - - s = splnet(); - - bpf_get(d); + if (uio->uio_resid != d->bd_bufsize) { + error = EINVAL; + goto out; + } /* * If there's a timeout, bd_rdStart is tagged when we start the read. @@ -431,13 +432,12 @@ bpfread(dev_t dev, struct uio *uio, int * ends when the timeout expires or when enough packets * have arrived to fill the store buffer. */ - while (d->bd_hbuf == 0) { + while (d->bd_hbuf == NULL) { if (d->bd_bif == NULL) { /* interface is gone */ if (d->bd_slen == 0) { - bpf_put(d); - splx(s); - return (EIO); + error = EIO; + goto out; } ROTATE_BUFFERS(d); break; @@ -461,18 +461,15 @@ bpfread(dev_t dev, struct uio *uio, int } else error = EWOULDBLOCK; } - if (error == EINTR || error == ERESTART) { - bpf_put(d); - splx(s); - return (error); - } + if (error == EINTR || error == ERESTART) + goto out; if (error == EWOULDBLOCK) { /* * On a timeout, return what's in the buffer, * which may be nothing. If there is something * in the store buffer, we can rotate the buffers. */ - if (d->bd_hbuf) + if (d->bd_hbuf != NULL) /* * We filled up the buffer in between * getting the timeout and arriving @@ -481,9 +478,8 @@ bpfread(dev_t dev, struct uio *uio, int break; if (d->bd_slen == 0) { - bpf_put(d); - splx(s); - return (0); + error = 0; + goto out; } ROTATE_BUFFERS(d); break; @@ -505,7 +501,7 @@ bpfread(dev_t dev, struct uio *uio, int d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; d->bd_hlen = 0; - +out: bpf_put(d); splx(s); @@ -554,32 +550,40 @@ bpfwrite(dev_t dev, struct uio *uio, int struct bpf_insn *fcode = NULL; int error, s; struct sockaddr_storage dst; + u_int dlt; d = bpfilter_lookup(minor(dev)); if (d->bd_bif == NULL) return (ENXIO); + bpf_get(d); ifp = d->bd_bif->bif_ifp; - if ((ifp->if_flags & IFF_UP) == 0) - return (ENETDOWN); + if ((ifp->if_flags & IFF_UP) == 0) { + error = ENETDOWN; + goto out; + } - if (uio->uio_resid == 0) - return (0); + if (uio->uio_resid == 0) { + error = 0; + goto out; + } KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */ bf = srp_get_locked(&d->bd_wfilter); if (bf != NULL) fcode = bf->bf_insns; - error = bpf_movein(uio, d->bd_bif->bif_dlt, &m, - (struct sockaddr *)&dst, fcode); + dlt = d->bd_bif->bif_dlt; + + error = bpf_movein(uio, dlt, &m, (struct sockaddr *)&dst, fcode); if (error) - return (error); + goto out; if (m->m_pkthdr.len > ifp->if_mtu) { m_freem(m); - return (EMSGSIZE); + error = EMSGSIZE; + goto out; } m->m_pkthdr.ph_rtableid = ifp->if_rdomain; @@ -591,9 +595,9 @@ bpfwrite(dev_t dev, struct uio *uio, int s = splsoftnet(); error = ifp->if_output(ifp, m, (struct sockaddr *)&dst, NULL); splx(s); - /* - * The driver frees the mbuf. - */ + +out: + bpf_put(d); return (error); }