On 13/09/16(Tue) 12:23, Martin Pieuchot wrote:
> Here's the big scary diff I've been using for some months now to stop
> grabbing the KERNEL_LOCK() in bpf_mtap(9). This has been originally
> written to prevent lock ordering inside pf_test(). Now that we're
> heading toward using a rwlock, we won't have this problem, but fewer
> usages of KERNEL_LOCK() is still interesting.
>
> I'm going to split this diff in small chunks to ease the review. But
> I'd appreciate if people could try to break it, test & report back.
>
> Some notes:
>
> - Now that selwakeup() is called in a thread context (task) we only
> rely on the KERNEL_LOCK() to serialize access to kqueue(9) data.
>
> - The reference counting is here to make sure a descriptor is not
> freed during a sleep. That's why the KERNEL_LOCK() is still
> necessary in the slow path. On the other hand bpf_catchpacket()
> relies on the reference guaranteed by the SRP list.
>
> - A mutex now protects the rotating buffers and their associated
> fields. It is dropped before calling ifpromisc() because USB
> devices sleep.
>
> - The dance around uiomove(9) is here to check that buffers aren't
> rotated while data is copied to userland. Setting ``b->bd_fbuf''
> to NULL should be enough to let bpf_catchpacket() to drop the
> patcket. But I added ``__in_uiomove'' to be able to have usable
> panic if something weird happen.
Here's another extracted diff: Use goto in read & write and always
increment the reference count in write.
ok?
Index: net/bpf.c
===================================================================
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.151
diff -u -p -r1.151 bpf.c
--- net/bpf.c 16 Nov 2016 09:00:01 -0000 1.151
+++ net/bpf.c 16 Nov 2016 11:08:25 -0000
@@ -406,16 +406,17 @@ bpfread(dev_t dev, struct uio *uio, int
if (d->bd_bif == NULL)
return (ENXIO);
+ s = splnet();
+ bpf_get(d);
+
/*
* Restrict application to use a buffer the same size as
* as kernel buffers.
*/
- if (uio->uio_resid != d->bd_bufsize)
- return (EINVAL);
-
- s = splnet();
-
- bpf_get(d);
+ if (uio->uio_resid != d->bd_bufsize) {
+ error = EINVAL;
+ goto out;
+ }
/*
* If there's a timeout, bd_rdStart is tagged when we start the read.
@@ -431,13 +432,12 @@ bpfread(dev_t dev, struct uio *uio, int
* ends when the timeout expires or when enough packets
* have arrived to fill the store buffer.
*/
- while (d->bd_hbuf == 0) {
+ while (d->bd_hbuf == NULL) {
if (d->bd_bif == NULL) {
/* interface is gone */
if (d->bd_slen == 0) {
- bpf_put(d);
- splx(s);
- return (EIO);
+ error = EIO;
+ goto out;
}
ROTATE_BUFFERS(d);
break;
@@ -461,18 +461,15 @@ bpfread(dev_t dev, struct uio *uio, int
} else
error = EWOULDBLOCK;
}
- if (error == EINTR || error == ERESTART) {
- bpf_put(d);
- splx(s);
- return (error);
- }
+ if (error == EINTR || error == ERESTART)
+ goto out;
if (error == EWOULDBLOCK) {
/*
* On a timeout, return what's in the buffer,
* which may be nothing. If there is something
* in the store buffer, we can rotate the buffers.
*/
- if (d->bd_hbuf)
+ if (d->bd_hbuf != NULL)
/*
* We filled up the buffer in between
* getting the timeout and arriving
@@ -481,9 +478,8 @@ bpfread(dev_t dev, struct uio *uio, int
break;
if (d->bd_slen == 0) {
- bpf_put(d);
- splx(s);
- return (0);
+ error = 0;
+ goto out;
}
ROTATE_BUFFERS(d);
break;
@@ -505,7 +501,7 @@ bpfread(dev_t dev, struct uio *uio, int
d->bd_fbuf = d->bd_hbuf;
d->bd_hbuf = NULL;
d->bd_hlen = 0;
-
+out:
bpf_put(d);
splx(s);
@@ -554,32 +550,40 @@ bpfwrite(dev_t dev, struct uio *uio, int
struct bpf_insn *fcode = NULL;
int error, s;
struct sockaddr_storage dst;
+ u_int dlt;
d = bpfilter_lookup(minor(dev));
if (d->bd_bif == NULL)
return (ENXIO);
+ bpf_get(d);
ifp = d->bd_bif->bif_ifp;
- if ((ifp->if_flags & IFF_UP) == 0)
- return (ENETDOWN);
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ error = ENETDOWN;
+ goto out;
+ }
- if (uio->uio_resid == 0)
- return (0);
+ if (uio->uio_resid == 0) {
+ error = 0;
+ goto out;
+ }
KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */
bf = srp_get_locked(&d->bd_wfilter);
if (bf != NULL)
fcode = bf->bf_insns;
- error = bpf_movein(uio, d->bd_bif->bif_dlt, &m,
- (struct sockaddr *)&dst, fcode);
+ dlt = d->bd_bif->bif_dlt;
+
+ error = bpf_movein(uio, dlt, &m, (struct sockaddr *)&dst, fcode);
if (error)
- return (error);
+ goto out;
if (m->m_pkthdr.len > ifp->if_mtu) {
m_freem(m);
- return (EMSGSIZE);
+ error = EMSGSIZE;
+ goto out;
}
m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
@@ -591,9 +595,9 @@ bpfwrite(dev_t dev, struct uio *uio, int
s = splsoftnet();
error = ifp->if_output(ifp, m, (struct sockaddr *)&dst, NULL);
splx(s);
- /*
- * The driver frees the mbuf.
- */
+
+out:
+ bpf_put(d);
return (error);
}