On 13/09/16(Tue) 12:23, Martin Pieuchot wrote:
> Here's the big scary diff I've been using for some months now to stop
> grabbing the KERNEL_LOCK() in bpf_mtap(9).  This has been originally
> written to prevent lock ordering inside pf_test().  Now that we're
> heading toward using a rwlock, we won't have this problem, but fewer
> usages of KERNEL_LOCK() is still interesting.
> 
> I'm going to split this diff in small chunks to ease the review.  But
> I'd appreciate if people could try to break it, test & report back.
> 
> Some notes:
> 
>   - Now that selwakeup() is called in a thread context (task) we only
>     rely on the KERNEL_LOCK() to serialize access to kqueue(9) data.
> 
>   - The reference counting is here to make sure a descriptor is not
>     freed during a sleep.  That's why the KERNEL_LOCK() is still
>     necessary in the slow path.  On the other hand bpf_catchpacket()
>     relies on the reference guaranteed by the SRP list.
> 
>   - A mutex now protects the rotating buffers and their associated
>     fields.  It is dropped before calling ifpromisc() because USB
>     devices sleep.
> 
>   - The dance around uiomove(9) is here to check that buffers aren't
>     rotated while data is copied to userland.  Setting ``b->bd_fbuf''
>     to NULL should be enough to let bpf_catchpacket() to drop the
>     patcket.  But I added ``__in_uiomove'' to be able to have usable
>     panic if something weird happen.

Here's another extracted diff:  Use goto in read & write and always
increment the reference count in write. 

ok?

Index: net/bpf.c
===================================================================
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.151
diff -u -p -r1.151 bpf.c
--- net/bpf.c   16 Nov 2016 09:00:01 -0000      1.151
+++ net/bpf.c   16 Nov 2016 11:08:25 -0000
@@ -406,16 +406,17 @@ bpfread(dev_t dev, struct uio *uio, int 
        if (d->bd_bif == NULL)
                return (ENXIO);
 
+       s = splnet();
+       bpf_get(d);
+
        /*
         * Restrict application to use a buffer the same size as
         * as kernel buffers.
         */
-       if (uio->uio_resid != d->bd_bufsize)
-               return (EINVAL);
-
-       s = splnet();
-
-       bpf_get(d);
+       if (uio->uio_resid != d->bd_bufsize) {
+               error = EINVAL;
+               goto out;
+       }
 
        /*
         * If there's a timeout, bd_rdStart is tagged when we start the read.
@@ -431,13 +432,12 @@ bpfread(dev_t dev, struct uio *uio, int 
         * ends when the timeout expires or when enough packets
         * have arrived to fill the store buffer.
         */
-       while (d->bd_hbuf == 0) {
+       while (d->bd_hbuf == NULL) {
                if (d->bd_bif == NULL) {
                        /* interface is gone */
                        if (d->bd_slen == 0) {
-                               bpf_put(d);
-                               splx(s);
-                               return (EIO);
+                               error = EIO;
+                               goto out;
                        }
                        ROTATE_BUFFERS(d);
                        break;
@@ -461,18 +461,15 @@ bpfread(dev_t dev, struct uio *uio, int 
                        } else
                                error = EWOULDBLOCK;
                }
-               if (error == EINTR || error == ERESTART) {
-                       bpf_put(d);
-                       splx(s);
-                       return (error);
-               }
+               if (error == EINTR || error == ERESTART)
+                       goto out;
                if (error == EWOULDBLOCK) {
                        /*
                         * On a timeout, return what's in the buffer,
                         * which may be nothing.  If there is something
                         * in the store buffer, we can rotate the buffers.
                         */
-                       if (d->bd_hbuf)
+                       if (d->bd_hbuf != NULL)
                                /*
                                 * We filled up the buffer in between
                                 * getting the timeout and arriving
@@ -481,9 +478,8 @@ bpfread(dev_t dev, struct uio *uio, int 
                                break;
 
                        if (d->bd_slen == 0) {
-                               bpf_put(d);
-                               splx(s);
-                               return (0);
+                               error = 0;
+                               goto out;
                        }
                        ROTATE_BUFFERS(d);
                        break;
@@ -505,7 +501,7 @@ bpfread(dev_t dev, struct uio *uio, int 
        d->bd_fbuf = d->bd_hbuf;
        d->bd_hbuf = NULL;
        d->bd_hlen = 0;
-
+out:
        bpf_put(d);
        splx(s);
 
@@ -554,32 +550,40 @@ bpfwrite(dev_t dev, struct uio *uio, int
        struct bpf_insn *fcode = NULL;
        int error, s;
        struct sockaddr_storage dst;
+       u_int dlt;
 
        d = bpfilter_lookup(minor(dev));
        if (d->bd_bif == NULL)
                return (ENXIO);
 
+       bpf_get(d);
        ifp = d->bd_bif->bif_ifp;
 
-       if ((ifp->if_flags & IFF_UP) == 0)
-               return (ENETDOWN);
+       if ((ifp->if_flags & IFF_UP) == 0) {
+               error = ENETDOWN;
+               goto out;
+       }
 
-       if (uio->uio_resid == 0)
-               return (0);
+       if (uio->uio_resid == 0) {
+               error = 0;
+               goto out;
+       }
 
        KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */
        bf = srp_get_locked(&d->bd_wfilter);
        if (bf != NULL)
                fcode = bf->bf_insns;
 
-       error = bpf_movein(uio, d->bd_bif->bif_dlt, &m,
-           (struct sockaddr *)&dst, fcode);
+       dlt = d->bd_bif->bif_dlt;
+
+       error = bpf_movein(uio, dlt, &m, (struct sockaddr *)&dst, fcode);
        if (error)
-               return (error);
+               goto out;
 
        if (m->m_pkthdr.len > ifp->if_mtu) {
                m_freem(m);
-               return (EMSGSIZE);
+               error = EMSGSIZE;
+               goto out;
        }
 
        m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
@@ -591,9 +595,9 @@ bpfwrite(dev_t dev, struct uio *uio, int
        s = splsoftnet();
        error = ifp->if_output(ifp, m, (struct sockaddr *)&dst, NULL);
        splx(s);
-       /*
-        * The driver frees the mbuf.
-        */
+
+out:
+       bpf_put(d);
        return (error);
 }
 

Reply via email to