Re: 8.3: kernel panic in bpf.c catchpacket()

Guy Helmer Wed, 17 Oct 2012 07:00:31 -0700

On Oct 12, 2012, at 8:54 AM, Guy Helmer <guy.hel...@gmail.com> wrote:


> 
> On Oct 10, 2012, at 1:37 PM, Alexander V. Chernikov <melif...@freebsd.org> 
> wrote:
> 
>> On 10.10.2012 00:36, Guy Helmer wrote:
>>> 
>>> On Oct 8, 2012, at 8:09 AM, Guy Helmer <guy.hel...@gmail.com> wrote:
>>> 
>>>> I'm seeing a consistent new kernel panic in FreeBSD 8.3:
>>>> I'm not seeing how bd_sbuf would be NULL here. Any ideas?
>>> 
>>> Since I've not had any replies, I hope nobody minds if I reply with more 
>>> information.
>>> 
>>> This panic seems to be occasionally triggered now that my user land code is 
>>> changing the packet filter a while after the bpd device has been opened and 
>>> an initial packet filter was set (previously, my code did not change the 
>>> filter after it was initially set).
>>> 
>>> I'm focusing on bpf_setf() since that seems to be the place that could be 
>>> tickling a problem, and I see that bpf_setf() calls reset_d(d) to clear the 
>>> hold buffer. I have manually verified that the BPFD lock is held during the 
>>> call to reset_d(), and the lock is held every other place that the buffers 
>>> are manipulated, so I haven't been able to find any place that seems 
>>> vulnerable to losing one of the bpf buffers. Still searching, but any help 
>>> would be appreciated.
>> 
>> Can you please check this code on -current?
>> Locking has changed quite significantly some time ago, so there is good 
>> chance that you can get rid of this panic (or discover different one which 
>> is really "new") :).
> 
> I'm not ready to run this app on current, so I have merged revs 229898, 
> 233937, 233938, 233946, 235744, 235745, 235746, 235747, 236231, 236251, 
> 236261, 236262, 236559, and 236806 to my 8.3 checkout to get code that should 
> be virtually identical to current without the timestamp changes.
> 
> Unfortunately, I have only been able to trigger the panic in my test lab once 
> -- so I'm not sure whether a lack of problems with the updated code will be 
> indicative of likely success in the field where this has been trigged 
> regularly at some sites…
> 
> Thanks,
> Guy
> 


FWIW, I was able to trigger the panic with the original 8.3 code again in my 
test lab. With these changes resulting from merging the revs mentioned above, I 
have not seen any panics in my test lab setup in two days of load testing, and 
AFAIK, packet capturing seems to be working fine.

I've included the diffs for reference for anyone encountering the issue.

Thanks, Alexander!

Guy

Index: net/bpf.c
===================================================================
--- net/bpf.c   (revision 239830)
+++ net/bpf.c   (working copy)
@@ -43,6 +43,8 @@
 
 #include <sys/types.h>
 #include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
@@ -66,6 +68,7 @@
 #include <sys/socket.h>
 
 #include <net/if.h>
+#define        BPF_INTERNAL
 #include <net/bpf.h>
 #include <net/bpf_buffer.h>
 #ifdef BPF_JITTER
@@ -139,6 +142,7 @@
 
 static void    bpf_attachd(struct bpf_d *, struct bpf_if *);
 static void    bpf_detachd(struct bpf_d *);
+static void    bpf_detachd_locked(struct bpf_d *);
 static void    bpf_freed(struct bpf_d *);
 static int     bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
                    struct sockaddr *, int *, struct bpf_insn *);
@@ -150,7 +154,7 @@
                    void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
                    struct timeval *);
 static void    reset_d(struct bpf_d *);
-static int      bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
+static int     bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
 static int     bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
 static int     bpf_setdlt(struct bpf_d *, u_int);
 static void    filt_bpfdetach(struct knote *);
@@ -168,6 +172,12 @@
 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
     bpf_stats_sysctl, "bpf statistics portal");
 
+static VNET_DEFINE(int, bpf_optimize_writers) = 0;
+#define        V_bpf_optimize_writers VNET(bpf_optimize_writers)
+SYSCTL_VNET_INT(_net_bpf, OID_AUTO, optimize_writers,
+    CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0,
+    "Do not send packets until BPF program is set");
+
 static d_open_t        bpfopen;
 static d_read_t        bpfread;
 static d_write_t       bpfwrite;
@@ -189,7 +199,38 @@
 static struct filterops bpfread_filtops =
        { 1, NULL, filt_bpfdetach, filt_bpfread };
 
+eventhandler_tag       bpf_ifdetach_cookie = NULL;
+
 /*
+ * LOCKING MODEL USED BY BPF:
+ * Locks:
+ * 1) global lock (BPF_LOCK). Mutex, used to protect interface 
addition/removal,
+ * some global counters and every bpf_if reference.
+ * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and 
their filters.
+ * 3) Descriptor lock. Mutex, used to protect BPF buffers and various 
structure fields
+ *   used by bpf_mtap code.
+ *
+ * Lock order:
+ *
+ * Global lock, interface lock, descriptor lock
+ *
+ * We have to acquire interface lock before descriptor main lock due to 
BPF_MTAP[2]
+ * working model. In many places (like bpf_detachd) we start with BPF 
descriptor
+ * (and we need to at least rlock it to get reliable interface pointer). This
+ * gives us potential LOR. As a result, we use global lock to protect from 
bpf_if
+ * change in every such place.
+ *
+ * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
+ * 3) descriptor main wlock.
+ * Reading bd_bif can be protected by any of these locks, typically global 
lock.
+ *
+ * Changing read/write BPF filter is protected by the same three locks,
+ * the same applies for reading.
+ *
+ * Sleeping in global lock is not allowed due to bpfdetach() using it.
+ */
+
+/*
  * Wrapper functions for various buffering methods.  If the set of buffer
  * modes expands, we will probably want to introduce a switch data structure
  * similar to protosw, et.
@@ -282,7 +323,6 @@
 static int
 bpf_canwritebuf(struct bpf_d *d)
 {
-
        BPFD_LOCK_ASSERT(d);
 
        switch (d->bd_bufmode) {
@@ -561,18 +601,93 @@
 static void
 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
 {
+       int op_w;
+
+       BPF_LOCK_ASSERT();
+
        /*
-        * Point d at bp, and add d to the interface's list of listeners.
-        * Finally, point the driver's bpf cookie at the interface so
-        * it will divert packets to bpf.
+        * Save sysctl value to protect from sysctl change
+        * between reads
         */
-       BPFIF_LOCK(bp);
+       op_w = V_bpf_optimize_writers;
+
+       if (d->bd_bif != NULL)
+               bpf_detachd_locked(d);
+       /*
+        * Point d at bp, and add d to the interface's list.
+        * Since there are many applicaiotns using BPF for
+        * sending raw packets only (dhcpd, cdpd are good examples)
+        * we can delay adding d to the list of active listeners until
+        * some filter is configured.
+        */
+
+       BPFIF_WLOCK(bp);
+       BPFD_LOCK(d);
+
        d->bd_bif = bp;
-       LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
 
+       if (op_w != 0) {
+               /* Add to writers-only list */
+               LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
+               /*
+                * We decrement bd_writer on every filter set operation.
+                * First BIOCSETF is done by pcap_open_live() to set up
+                * snap length. After that appliation usually sets its own 
filter
+                */
+               d->bd_writer = 2;
+       } else
+               LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+
+       BPFD_UNLOCK(d);
+       BPFIF_WUNLOCK(bp);
+
        bpf_bpfd_cnt++;
-       BPFIF_UNLOCK(bp);
 
+       CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
+           __func__, d->bd_pid, d->bd_writer ? "writer" : "active");
+
+       if (op_w == 0)
+               EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
+}
+
+/*
+ * Add d to the list of active bp filters.
+ * Reuqires bpf_attachd() to be called before
+ */
+static void
+bpf_upgraded(struct bpf_d *d)
+{
+       struct bpf_if *bp;
+
+       BPF_LOCK_ASSERT();
+
+       bp = d->bd_bif;
+
+       /*
+        * Filter can be set several times without specifying interface.
+        * Mark d as reader and exit.
+        */
+       if (bp == NULL) {
+               BPFD_LOCK(d);
+               d->bd_writer = 0;
+               BPFD_UNLOCK(d);
+               return;
+       }
+
+       BPFIF_WLOCK(bp);
+       BPFD_LOCK(d);
+
+       /* Remove from writers-only list */
+       LIST_REMOVE(d, bd_next);
+       LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+       /* Mark d as reader */
+       d->bd_writer = 0;
+
+       BPFD_UNLOCK(d);
+       BPFIF_WUNLOCK(bp);
+
+       CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
+
        EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
 }
 
@@ -582,27 +697,48 @@
 static void
 bpf_detachd(struct bpf_d *d)
 {
+       BPF_LOCK();
+       bpf_detachd_locked(d);
+       BPF_UNLOCK();
+}
+
+static void
+bpf_detachd_locked(struct bpf_d *d)
+{
        int error;
        struct bpf_if *bp;
        struct ifnet *ifp;
 
-       bp = d->bd_bif;
-       BPFIF_LOCK(bp);
+       CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
+
+       BPF_LOCK_ASSERT();
+
+       /* Check if descriptor is attached */
+       if ((bp = d->bd_bif) == NULL)
+               return;
+
+       BPFIF_WLOCK(bp);
        BPFD_LOCK(d);
-       ifp = d->bd_bif->bif_ifp;
 
+       /* Save bd_writer value */
+       error = d->bd_writer;
+
        /*
         * Remove d from the interface's descriptor list.
         */
        LIST_REMOVE(d, bd_next);
 
-       bpf_bpfd_cnt--;
+       ifp = bp->bif_ifp;
        d->bd_bif = NULL;
        BPFD_UNLOCK(d);
-       BPFIF_UNLOCK(bp);
+       BPFIF_WUNLOCK(bp);
 
-       EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
+       bpf_bpfd_cnt--;
 
+       /* Call event handler iff d is attached */
+       if (error == 0)
+               EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
+
        /*
         * Check if this descriptor had requested promiscuous mode.
         * If so, turn it off.
@@ -640,10 +776,7 @@
        d->bd_state = BPF_IDLE;
        BPFD_UNLOCK(d);
        funsetown(&d->bd_sigio);
-       mtx_lock(&bpf_mtx);
-       if (d->bd_bif)
-               bpf_detachd(d);
-       mtx_unlock(&bpf_mtx);
+       bpf_detachd(d);
 #ifdef MAC
        mac_bpfdesc_destroy(d);
 #endif /* MAC */
@@ -663,7 +796,7 @@
 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
        struct bpf_d *d;
-       int error;
+       int error, size;
 
        d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
        error = devfs_set_cdevpriv(d, bpf_dtor);
@@ -681,15 +814,19 @@
        d->bd_bufmode = BPF_BUFMODE_BUFFER;
        d->bd_sig = SIGIO;
        d->bd_direction = BPF_D_INOUT;
-       d->bd_pid = td->td_proc->p_pid;
+       BPF_PID_REFRESH(d, td);
 #ifdef MAC
        mac_bpfdesc_init(d);
        mac_bpfdesc_create(td->td_ucred, d);
 #endif
-       mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
-       callout_init_mtx(&d->bd_callout, &d->bd_mtx, 0);
-       knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
+       mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
+       callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
+       knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
 
+       /* Allocate default buffers */
+       size = d->bd_bufsize;
+       bpf_buffer_ioctl_sblen(d, &size);
+
        return (0);
 }
 
@@ -718,7 +855,7 @@
        non_block = ((ioflag & O_NONBLOCK) != 0);
 
        BPFD_LOCK(d);
-       d->bd_pid = curthread->td_proc->p_pid;
+       BPF_PID_REFRESH_CUR(d);
        if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
                BPFD_UNLOCK(d);
                return (EOPNOTSUPP);
@@ -764,7 +901,7 @@
                        BPFD_UNLOCK(d);
                        return (EWOULDBLOCK);
                }
-               error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
+               error = msleep(d, &d->bd_lock, PRINET|PCATCH,
                     "bpf", d->bd_rtout);
                if (error == EINTR || error == ERESTART) {
                        BPFD_UNLOCK(d);
@@ -881,8 +1018,9 @@
        if (error != 0)
                return (error);
 
-       d->bd_pid = curthread->td_proc->p_pid;
+       BPF_PID_REFRESH_CUR(d);
        d->bd_wcount++;
+       /* XXX: locking required */
        if (d->bd_bif == NULL) {
                d->bd_wdcount++;
                return (ENXIO);
@@ -903,6 +1041,7 @@
        bzero(&dst, sizeof(dst));
        m = NULL;
        hlen = 0;
+       /* XXX: bpf_movein() can sleep */
        error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
            &m, &dst, &hlen, d->bd_wfilter);
        if (error) {
@@ -962,7 +1101,7 @@
 reset_d(struct bpf_d *d)
 {
 
-       mtx_assert(&d->bd_mtx, MA_OWNED);
+       BPFD_LOCK_ASSERT(d);
 
        if ((d->bd_hbuf != NULL) &&
            (d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
@@ -1028,7 +1167,7 @@
         * Refresh PID associated with this descriptor.
         */
        BPFD_LOCK(d);
-       d->bd_pid = td->td_proc->p_pid;
+       BPF_PID_REFRESH(d, td);
        if (d->bd_state == BPF_WAITING)
                callout_stop(&d->bd_callout);
        d->bd_state = BPF_IDLE;
@@ -1079,7 +1218,9 @@
        case BIOCGDLTLIST32:
        case BIOCGRTIMEOUT32:
        case BIOCSRTIMEOUT32:
+               BPFD_LOCK(d);
                d->bd_compat32 = 1;
+               BPFD_UNLOCK(d);
        }
 #endif
 
@@ -1124,7 +1265,9 @@
         * Get buffer len [for read()].
         */
        case BIOCGBLEN:
+               BPFD_LOCK(d);
                *(u_int *)addr = d->bd_bufsize;
+               BPFD_UNLOCK(d);
                break;
 
        /*
@@ -1179,10 +1322,12 @@
         * Get current data link type.
         */
        case BIOCGDLT:
+               BPF_LOCK();
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        *(u_int *)addr = d->bd_bif->bif_dlt;
+               BPF_UNLOCK();
                break;
 
        /*
@@ -1197,6 +1342,7 @@
                        list32 = (struct bpf_dltlist32 *)addr;
                        dltlist.bfl_len = list32->bfl_len;
                        dltlist.bfl_list = PTRIN(list32->bfl_list);
+                       BPF_LOCK();
                        if (d->bd_bif == NULL)
                                error = EINVAL;
                        else {
@@ -1204,31 +1350,37 @@
                                if (error == 0)
                                        list32->bfl_len = dltlist.bfl_len;
                        }
+                       BPF_UNLOCK();
                        break;
                }
 #endif
 
        case BIOCGDLTLIST:
+               BPF_LOCK();
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
+               BPF_UNLOCK();
                break;
 
        /*
         * Set data link type.
         */
        case BIOCSDLT:
+               BPF_LOCK();
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        error = bpf_setdlt(d, *(u_int *)addr);
+               BPF_UNLOCK();
                break;
 
        /*
         * Get interface name.
         */
        case BIOCGETIF:
+               BPF_LOCK();
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else {
@@ -1238,13 +1390,16 @@
                        strlcpy(ifr->ifr_name, ifp->if_xname,
                            sizeof(ifr->ifr_name));
                }
+               BPF_UNLOCK();
                break;
 
        /*
         * Set interface.
         */
        case BIOCSETIF:
+               BPF_LOCK();
                error = bpf_setif(d, (struct ifreq *)addr);
+               BPF_UNLOCK();
                break;
 
        /*
@@ -1327,7 +1482,9 @@
         * Set immediate mode.
         */
        case BIOCIMMEDIATE:
+               BPFD_LOCK(d);
                d->bd_immediate = *(u_int *)addr;
+               BPFD_UNLOCK(d);
                break;
 
        case BIOCVERSION:
@@ -1343,21 +1500,27 @@
         * Get "header already complete" flag
         */
        case BIOCGHDRCMPLT:
+               BPFD_LOCK(d);
                *(u_int *)addr = d->bd_hdrcmplt;
+               BPFD_UNLOCK(d);
                break;
 
        /*
         * Set "header already complete" flag
         */
        case BIOCSHDRCMPLT:
+               BPFD_LOCK(d);
                d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
+               BPFD_UNLOCK(d);
                break;
 
        /*
         * Get packet direction flag
         */
        case BIOCGDIRECTION:
+               BPFD_LOCK(d);
                *(u_int *)addr = d->bd_direction;
+               BPFD_UNLOCK(d);
                break;
 
        /*
@@ -1372,7 +1535,9 @@
                        case BPF_D_IN:
                        case BPF_D_INOUT:
                        case BPF_D_OUT:
+                               BPFD_LOCK(d);
                                d->bd_direction = direction;
+                               BPFD_UNLOCK(d);
                                break;
                        default:
                                error = EINVAL;
@@ -1381,26 +1546,38 @@
                break;
 
        case BIOCFEEDBACK:
+               BPFD_LOCK(d);
                d->bd_feedback = *(u_int *)addr;
+               BPFD_UNLOCK(d);
                break;
 
        case BIOCLOCK:
+               BPFD_LOCK(d);
                d->bd_locked = 1;
+               BPFD_UNLOCK(d);
                break;
 
        case FIONBIO:           /* Non-blocking I/O */
                break;
 
        case FIOASYNC:          /* Send signal on receive packets */
+               BPFD_LOCK(d);
                d->bd_async = *(int *)addr;
+               BPFD_UNLOCK(d);
                break;
 
        case FIOSETOWN:
+               /*
+                * XXX: Add some sort of locking here?
+                * fsetown() can sleep.
+                */
                error = fsetown(*(int *)addr, &d->bd_sigio);
                break;
 
        case FIOGETOWN:
+               BPFD_LOCK(d);
                *(int *)addr = fgetown(&d->bd_sigio);
+               BPFD_UNLOCK(d);
                break;
 
        /* This is deprecated, FIOSETOWN should be used instead. */
@@ -1421,16 +1598,23 @@
 
                        if (sig >= NSIG)
                                error = EINVAL;
-                       else
+                       else {
+                               BPFD_LOCK(d);
                                d->bd_sig = sig;
+                               BPFD_UNLOCK(d);
+                       }
                        break;
                }
        case BIOCGRSIG:
+               BPFD_LOCK(d);
                *(u_int *)addr = d->bd_sig;
+               BPFD_UNLOCK(d);
                break;
 
        case BIOCGETBUFMODE:
+               BPFD_LOCK(d);
                *(u_int *)addr = d->bd_bufmode;
+               BPFD_UNLOCK(d);
                break;
 
        case BIOCSETBUFMODE:
@@ -1485,95 +1669,130 @@
 /*
  * Set d's packet filter program to fp.  If this file already has a filter,
  * free it and replace it.  Returns EINVAL for bogus requests.
+ *
+ * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
+ * since reading d->bd_bif can't be protected by d or interface lock due to
+ * lock order.
+ *
+ * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
+ * interface read lock to read all filers.
+ *
  */
 static int
 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 {
+#ifdef COMPAT_FREEBSD32
+       struct bpf_program fp_swab;
+       struct bpf_program32 *fp32;
+#endif
        struct bpf_insn *fcode, *old;
-       u_int wfilter, flen, size;
 #ifdef BPF_JITTER
-       bpf_jit_filter *ofunc;
+       bpf_jit_filter *jfunc, *ofunc;
 #endif
+       size_t size;
+       u_int flen;
+       int need_upgrade;
+
 #ifdef COMPAT_FREEBSD32
-       struct bpf_program32 *fp32;
-       struct bpf_program fp_swab;
-
-       if (cmd == BIOCSETWF32 || cmd == BIOCSETF32 || cmd == BIOCSETFNR32) {
+       switch (cmd) {
+       case BIOCSETF32:
+       case BIOCSETWF32:
+       case BIOCSETFNR32:
                fp32 = (struct bpf_program32 *)fp;
                fp_swab.bf_len = fp32->bf_len;
                fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
                fp = &fp_swab;
-               if (cmd == BIOCSETWF32)
+               switch (cmd) {
+               case BIOCSETF32:
+                       cmd = BIOCSETF;
+                       break;
+               case BIOCSETWF32:
                        cmd = BIOCSETWF;
+                       break;
+               }
+               break;
        }
 #endif
-       if (cmd == BIOCSETWF) {
-               old = d->bd_wfilter;
-               wfilter = 1;
+
+       fcode = NULL;
 #ifdef BPF_JITTER
-               ofunc = NULL;
+       jfunc = ofunc = NULL;
 #endif
-       } else {
-               wfilter = 0;
-               old = d->bd_rfilter;
-#ifdef BPF_JITTER
-               ofunc = d->bd_bfilter;
-#endif
-       }
-       if (fp->bf_insns == NULL) {
-               if (fp->bf_len != 0)
+       need_upgrade = 0;
+
+       /*
+        * Check new filter validness before acquiring any locks.
+        * Allocate memory for new filter, if needed.
+        */
+       flen = fp->bf_len;
+       if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
+               return (EINVAL);
+       size = flen * sizeof(*fp->bf_insns);
+       if (size > 0) {
+               /* We're setting up new filter.  Copy and check actual data. */
+               fcode = malloc(size, M_BPF, M_WAITOK);
+               if (copyin(fp->bf_insns, fcode, size) != 0 ||
+                   !bpf_validate(fcode, flen)) {
+                       free(fcode, M_BPF);
                        return (EINVAL);
-               BPFD_LOCK(d);
-               if (wfilter)
-                       d->bd_wfilter = NULL;
-               else {
-                       d->bd_rfilter = NULL;
-#ifdef BPF_JITTER
-                       d->bd_bfilter = NULL;
-#endif
-                       if (cmd == BIOCSETF)
-                               reset_d(d);
                }
-               BPFD_UNLOCK(d);
-               if (old != NULL)
-                       free((caddr_t)old, M_BPF);
 #ifdef BPF_JITTER
-               if (ofunc != NULL)
-                       bpf_destroy_jit_filter(ofunc);
+               /* Filter is copied inside fcode and is perfectly valid. */
+               jfunc = bpf_jitter(fcode, flen);
 #endif
-               return (0);
        }
-       flen = fp->bf_len;
-       if (flen > bpf_maxinsns)
-               return (EINVAL);
 
-       size = flen * sizeof(*fp->bf_insns);
-       fcode = (struct bpf_insn *)malloc(size, M_BPF, M_WAITOK);
-       if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
-           bpf_validate(fcode, (int)flen)) {
-               BPFD_LOCK(d);
-               if (wfilter)
-                       d->bd_wfilter = fcode;
-               else {
-                       d->bd_rfilter = fcode;
+       BPF_LOCK();
+
+       /*
+        * Set up new filter.
+        * Protect filter change by interface lock.
+        * Additionally, we are protected by global lock here.
+        */
+       if (d->bd_bif != NULL)
+               BPFIF_WLOCK(d->bd_bif);
+       BPFD_LOCK(d);
+       if (cmd == BIOCSETWF) {
+               old = d->bd_wfilter;
+               d->bd_wfilter = fcode;
+       } else {
+               old = d->bd_rfilter;
+               d->bd_rfilter = fcode;
 #ifdef BPF_JITTER
-                       d->bd_bfilter = bpf_jitter(fcode, flen);
+               ofunc = d->bd_bfilter;
+               d->bd_bfilter = jfunc;
 #endif
-                       if (cmd == BIOCSETF)
-                               reset_d(d);
+               if (cmd == BIOCSETF)
+                       reset_d(d);
+
+               if (fcode != NULL) {
+                       /*
+                        * Do not require upgrade by first BIOCSETF
+                        * (used to set snaplen) by pcap_open_live().
+                        */
+                       if (d->bd_writer != 0 && --d->bd_writer == 0)
+                               need_upgrade = 1;
+                       CTR4(KTR_NET, "%s: filter function set by pid %d, "
+                           "bd_writer counter %d, need_upgrade %d",
+                           __func__, d->bd_pid, d->bd_writer, need_upgrade);
                }
-               BPFD_UNLOCK(d);
-               if (old != NULL)
-                       free((caddr_t)old, M_BPF);
+       }
+       BPFD_UNLOCK(d);
+       if (d->bd_bif != NULL)
+               BPFIF_WUNLOCK(d->bd_bif);
+       if (old != NULL)
+               free(old, M_BPF);
 #ifdef BPF_JITTER
-               if (ofunc != NULL)
-                       bpf_destroy_jit_filter(ofunc);
+       if (ofunc != NULL)
+               bpf_destroy_jit_filter(ofunc);
 #endif
 
-               return (0);
-       }
-       free((caddr_t)fcode, M_BPF);
-       return (EINVAL);
+       /* Move d to active readers list. */
+       if (need_upgrade)
+               bpf_upgraded(d);
+
+       BPF_UNLOCK();
+       return (0);
 }
 
 /*
@@ -1587,28 +1806,30 @@
        struct bpf_if *bp;
        struct ifnet *theywant;
 
+       BPF_LOCK_ASSERT();
+
        theywant = ifunit(ifr->ifr_name);
        if (theywant == NULL || theywant->if_bpf == NULL)
                return (ENXIO);
 
        bp = theywant->if_bpf;
 
+       /* Check if interface is not being detached from BPF */
+       BPFIF_RLOCK(bp);
+       if (bp->flags & BPFIF_FLAG_DYING) {
+               BPFIF_RUNLOCK(bp);
+               return (ENXIO);
+       }
+       BPFIF_RUNLOCK(bp);
+
        /*
         * Behavior here depends on the buffering model.  If we're using
         * kernel memory buffers, then we can allocate them here.  If we're
         * using zero-copy, then the user process must have registered
         * buffers by the time we get here.  If not, return an error.
-        *
-        * XXXRW: There are locking issues here with multi-threaded use: what
-        * if two threads try to set the interface at once?
         */
        switch (d->bd_bufmode) {
        case BPF_BUFMODE_BUFFER:
-               if (d->bd_sbuf == NULL)
-                       bpf_buffer_alloc(d);
-               KASSERT(d->bd_sbuf != NULL, ("bpf_setif: bd_sbuf NULL"));
-               break;
-
        case BPF_BUFMODE_ZBUF:
                if (d->bd_sbuf == NULL)
                        return (EINVAL);
@@ -1617,15 +1838,8 @@
        default:
                panic("bpf_setif: bufmode %d", d->bd_bufmode);
        }
-       if (bp != d->bd_bif) {
-               if (d->bd_bif)
-                       /*
-                        * Detach if attached to something else.
-                        */
-                       bpf_detachd(d);
-
+       if (bp != d->bd_bif)
                bpf_attachd(d, bp);
-       }
        BPFD_LOCK(d);
        reset_d(d);
        BPFD_UNLOCK(d);
@@ -1653,7 +1867,7 @@
         */
        revents = events & (POLLOUT | POLLWRNORM);
        BPFD_LOCK(d);
-       d->bd_pid = td->td_proc->p_pid;
+       BPF_PID_REFRESH(d, td);
        if (events & (POLLIN | POLLRDNORM)) {
                if (bpf_ready(d))
                        revents |= events & (POLLIN | POLLRDNORM);
@@ -1688,7 +1902,7 @@
         * Refresh PID associated with this descriptor.
         */
        BPFD_LOCK(d);
-       d->bd_pid = curthread->td_proc->p_pid;
+       BPF_PID_REFRESH_CUR(d);
        kn->kn_fop = &bpfread_filtops;
        kn->kn_hook = d;
        knlist_add(&d->bd_sel.si_note, kn, 1);
@@ -1744,9 +1958,19 @@
        struct timeval tv;
 
        gottime = 0;
-       BPFIF_LOCK(bp);
+
+       BPFIF_RLOCK(bp);
+
        LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
-               BPFD_LOCK(d);
+               /*
+                * We are not using any locks for d here because:
+                * 1) any filter change is protected by interface
+                * write lock
+                * 2) destroying/detaching d is protected by interface
+                * write lock, too
+                */
+
+               /* XXX: Do not protect counter for the sake of performance. */
                ++d->bd_rcount;
                /*
                 * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
@@ -1762,6 +1986,11 @@
 #endif
                slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
                if (slen != 0) {
+                       /*
+                        * Filter matches. Let's to acquire write lock.
+                        */
+                       BPFD_LOCK(d);
+
                        d->bd_fcount++;
                        if (!gottime) {
                                microtime(&tv);
@@ -1772,10 +2001,10 @@
 #endif
                                catchpacket(d, pkt, pktlen, slen,
                                    bpf_append_bytes, &tv);
+                       BPFD_UNLOCK(d);
                }
-               BPFD_UNLOCK(d);
        }
-       BPFIF_UNLOCK(bp);
+       BPFIF_RUNLOCK(bp);
 }
 
 #define        BPF_CHECK_DIRECTION(d, r, i)                            \
@@ -1784,6 +2013,7 @@
 
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf chain.
+ * Locking model is explained in bpf_tap().
  */
 void
 bpf_mtap(struct bpf_if *bp, struct mbuf *m)
@@ -1806,11 +2036,11 @@
 
        pktlen = m_length(m, NULL);
 
-       BPFIF_LOCK(bp);
+       BPFIF_RLOCK(bp);
+
        LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
                if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
                        continue;
-               BPFD_LOCK(d);
                ++d->bd_rcount;
 #ifdef BPF_JITTER
                bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
@@ -1821,6 +2051,8 @@
 #endif
                slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
                if (slen != 0) {
+                       BPFD_LOCK(d);
+
                        d->bd_fcount++;
                        if (!gottime) {
                                microtime(&tv);
@@ -1831,10 +2063,10 @@
 #endif
                                catchpacket(d, (u_char *)m, pktlen, slen,
                                    bpf_append_mbuf, &tv);
+                       BPFD_UNLOCK(d);
                }
-               BPFD_UNLOCK(d);
        }
-       BPFIF_UNLOCK(bp);
+       BPFIF_RUNLOCK(bp);
 }
 
 /*
@@ -1869,14 +2101,17 @@
        mb.m_len = dlen;
        pktlen += dlen;
 
-       BPFIF_LOCK(bp);
+
+       BPFIF_RLOCK(bp);
+
        LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
                if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
                        continue;
-               BPFD_LOCK(d);
                ++d->bd_rcount;
                slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
                if (slen != 0) {
+                       BPFD_LOCK(d);
+
                        d->bd_fcount++;
                        if (!gottime) {
                                microtime(&tv);
@@ -1887,10 +2122,10 @@
 #endif
                                catchpacket(d, (u_char *)&mb, pktlen, slen,
                                    bpf_append_mbuf, &tv);
+                       BPFD_UNLOCK(d);
                }
-               BPFD_UNLOCK(d);
        }
-       BPFIF_UNLOCK(bp);
+       BPFIF_RUNLOCK(bp);
 }
 
 #undef BPF_CHECK_DIRECTION
@@ -2040,7 +2275,7 @@
        }
        if (d->bd_wfilter != NULL)
                free((caddr_t)d->bd_wfilter, M_BPF);
-       mtx_destroy(&d->bd_mtx);
+       mtx_destroy(&d->bd_lock);
 }
 
 /*
@@ -2070,15 +2305,16 @@
                panic("bpfattach");
 
        LIST_INIT(&bp->bif_dlist);
+       LIST_INIT(&bp->bif_wlist);
        bp->bif_ifp = ifp;
        bp->bif_dlt = dlt;
-       mtx_init(&bp->bif_mtx, "bpf interface lock", NULL, MTX_DEF);
+       rw_init(&bp->bif_lock, "bpf interface lock");
        KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
        *driverp = bp;
 
-       mtx_lock(&bpf_mtx);
+       BPF_LOCK();
        LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
-       mtx_unlock(&bpf_mtx);
+       BPF_UNLOCK();
 
        /*
         * Compute the length of the bpf header.  This is not necessarily
@@ -2093,10 +2329,9 @@
 }
 
 /*
- * Detach bpf from an interface.  This involves detaching each descriptor
- * associated with the interface, and leaving bd_bif NULL.  Notify each
- * descriptor as it's detached so that any sleepers wake up and get
- * ENXIO.
+ * Detach bpf from an interface. This involves detaching each descriptor
+ * associated with the interface. Notify each descriptor as it's detached
+ * so that any sleepers wake up and get ENXIO.
  */
 void
 bpfdetach(struct ifnet *ifp)
@@ -2109,31 +2344,45 @@
        ndetached = 0;
 #endif
 
+       BPF_LOCK();
        /* Find all bpf_if struct's which reference ifp and detach them. */
        do {
-               mtx_lock(&bpf_mtx);
                LIST_FOREACH(bp, &bpf_iflist, bif_next) {
                        if (ifp == bp->bif_ifp)
                                break;
                }
                if (bp != NULL)
                        LIST_REMOVE(bp, bif_next);
-               mtx_unlock(&bpf_mtx);
 
                if (bp != NULL) {
 #ifdef INVARIANTS
                        ndetached++;
 #endif
                        while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
-                               bpf_detachd(d);
+                               bpf_detachd_locked(d);
                                BPFD_LOCK(d);
                                bpf_wakeup(d);
                                BPFD_UNLOCK(d);
                        }
-                       mtx_destroy(&bp->bif_mtx);
-                       free(bp, M_BPF);
+                       /* Free writer-only descriptors */
+                       while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
+                               bpf_detachd_locked(d);
+                               BPFD_LOCK(d);
+                               bpf_wakeup(d);
+                               BPFD_UNLOCK(d);
+                       }
+
+                       /*
+                        * Delay freing bp till interface is detached
+                        * and all routes through this interface are removed.
+                        * Mark bp as detached to restrict new consumers.
+                        */
+                       BPFIF_WLOCK(bp);
+                       bp->flags |= BPFIF_FLAG_DYING;
+                       BPFIF_WUNLOCK(bp);
                }
        } while (bp != NULL);
+       BPF_UNLOCK();
 
 #ifdef INVARIANTS
        if (ndetached == 0)
@@ -2142,6 +2391,37 @@
 }
 
 /*
+ * Interface departure handler.
+ * Note departure event does not guarantee interface is going down.
+ */
+static void
+bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+       struct bpf_if *bp;
+
+       BPF_LOCK();
+       if ((bp = ifp->if_bpf) == NULL) {
+               BPF_UNLOCK();
+               return;
+       }
+
+       /* Check if bpfdetach() was called previously */
+       if ((bp->flags & BPFIF_FLAG_DYING) == 0) {
+               BPF_UNLOCK();
+               return;
+       }
+
+       CTR3(KTR_NET, "%s: freing BPF instance %p for interface %p",
+           __func__, bp, ifp);
+
+       ifp->if_bpf = NULL;
+       BPF_UNLOCK();
+
+       rw_destroy(&bp->bif_lock);
+       free(bp, M_BPF);
+}
+
+/*
  * Get a list of available data link type of the interface.
  */
 static int
@@ -2151,24 +2431,22 @@
        struct ifnet *ifp;
        struct bpf_if *bp;
 
+       BPF_LOCK_ASSERT();
+
        ifp = d->bd_bif->bif_ifp;
        n = 0;
        error = 0;
-       mtx_lock(&bpf_mtx);
        LIST_FOREACH(bp, &bpf_iflist, bif_next) {
                if (bp->bif_ifp != ifp)
                        continue;
                if (bfl->bfl_list != NULL) {
-                       if (n >= bfl->bfl_len) {
-                               mtx_unlock(&bpf_mtx);
+                       if (n >= bfl->bfl_len)
                                return (ENOMEM);
-                       }
                        error = copyout(&bp->bif_dlt,
                            bfl->bfl_list + n, sizeof(u_int));
                }
                n++;
        }
-       mtx_unlock(&bpf_mtx);
        bfl->bfl_len = n;
        return (error);
 }
@@ -2183,18 +2461,19 @@
        struct ifnet *ifp;
        struct bpf_if *bp;
 
+       BPF_LOCK_ASSERT();
+
        if (d->bd_bif->bif_dlt == dlt)
                return (0);
        ifp = d->bd_bif->bif_ifp;
-       mtx_lock(&bpf_mtx);
+
        LIST_FOREACH(bp, &bpf_iflist, bif_next) {
                if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
                        break;
        }
-       mtx_unlock(&bpf_mtx);
+
        if (bp != NULL) {
                opromisc = d->bd_promisc;
-               bpf_detachd(d);
                bpf_attachd(d, bp);
                BPFD_LOCK(d);
                reset_d(d);
@@ -2223,6 +2502,11 @@
        dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
        /* For compatibility */
        make_dev_alias(dev, "bpf0");
+
+       /* Register interface departure handler */
+       bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
+                   ifnet_departure_event, bpf_ifdetach, NULL,
+                   EVENTHANDLER_PRI_ANY);
 }
 
 /*
@@ -2236,9 +2520,9 @@
        struct bpf_if *bp;
        struct bpf_d *bd;
 
-       mtx_lock(&bpf_mtx);
+       BPF_LOCK();
        LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-               BPFIF_LOCK(bp);
+               BPFIF_RLOCK(bp);
                LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
                        BPFD_LOCK(bd);
                        bd->bd_rcount = 0;
@@ -2249,11 +2533,14 @@
                        bd->bd_zcopy = 0;
                        BPFD_UNLOCK(bd);
                }
-               BPFIF_UNLOCK(bp);
+               BPFIF_RUNLOCK(bp);
        }
-       mtx_unlock(&bpf_mtx);
+       BPF_UNLOCK();
 }
 
+/*
+ * Fill filter statistics
+ */
 static void
 bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
 {
@@ -2261,6 +2548,7 @@
        bzero(d, sizeof(*d));
        BPFD_LOCK_ASSERT(bd);
        d->bd_structsize = sizeof(*d);
+       /* XXX: reading should be protected by global lock */
        d->bd_immediate = bd->bd_immediate;
        d->bd_promisc = bd->bd_promisc;
        d->bd_hdrcmplt = bd->bd_hdrcmplt;
@@ -2285,6 +2573,9 @@
        d->bd_bufmode = bd->bd_bufmode;
 }
 
+/*
+ * Handle `netstat -B' stats request
+ */
 static int
 bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
 {
@@ -2322,24 +2613,31 @@
        if (bpf_bpfd_cnt == 0)
                return (SYSCTL_OUT(req, 0, 0));
        xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
-       mtx_lock(&bpf_mtx);
+       BPF_LOCK();
        if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
-               mtx_unlock(&bpf_mtx);
+               BPF_UNLOCK();
                free(xbdbuf, M_BPF);
                return (ENOMEM);
        }
        index = 0;
        LIST_FOREACH(bp, &bpf_iflist, bif_next) {
-               BPFIF_LOCK(bp);
+               BPFIF_RLOCK(bp);
+               /* Send writers-only first */
+               LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
+                       xbd = &xbdbuf[index++];
+                       BPFD_LOCK(bd);
+                       bpfstats_fill_xbpf(xbd, bd);
+                       BPFD_UNLOCK(bd);
+               }
                LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
                        xbd = &xbdbuf[index++];
                        BPFD_LOCK(bd);
                        bpfstats_fill_xbpf(xbd, bd);
                        BPFD_UNLOCK(bd);
                }
-               BPFIF_UNLOCK(bp);
+               BPFIF_RUNLOCK(bp);
        }
-       mtx_unlock(&bpf_mtx);
+       BPF_UNLOCK();
        error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
        free(xbdbuf, M_BPF);
        return (error);
Index: net/bpf.h
===================================================================
--- net/bpf.h   (revision 239830)
+++ net/bpf.h   (working copy)
@@ -917,14 +917,21 @@
 
 /*
  * Descriptor associated with each attached hardware interface.
+ * FIXME: this structure is exposed to external callers to speed up
+ * bpf_peers_present() call. However we cover all fields not needed by
+ * this function via BPF_INTERNAL define
  */
 struct bpf_if {
        LIST_ENTRY(bpf_if)      bif_next;       /* list of all interfaces */
        LIST_HEAD(, bpf_d)      bif_dlist;      /* descriptor list */
+#ifdef BPF_INTERNAL
        u_int bif_dlt;                          /* link layer type */
        u_int bif_hdrlen;               /* length of header (with padding) */
        struct ifnet *bif_ifp;          /* corresponding interface */
-       struct mtx      bif_mtx;        /* mutex for interface */
+       struct rwlock bif_lock;         /* interface lock */
+       LIST_HEAD(, bpf_d)      bif_wlist;      /* writer-only list */
+       int flags;                      /* Interface flags */
+#endif
 };
 
 void    bpf_bufheld(struct bpf_d *d);
Index: net/bpf_buffer.c
===================================================================
--- net/bpf_buffer.c    (revision 239830)
+++ net/bpf_buffer.c    (working copy)
@@ -93,21 +93,6 @@
 SYSCTL_INT(_net_bpf, OID_AUTO, maxbufsize, CTLFLAG_RW,
     &bpf_maxbufsize, 0, "Default capture buffer in bytes");
 
-void
-bpf_buffer_alloc(struct bpf_d *d)
-{
-
-       KASSERT(d->bd_fbuf == NULL, ("bpf_buffer_alloc: bd_fbuf != NULL"));
-       KASSERT(d->bd_sbuf == NULL, ("bpf_buffer_alloc: bd_sbuf != NULL"));
-       KASSERT(d->bd_hbuf == NULL, ("bpf_buffer_alloc: bd_hbuf != NULL"));
-
-       d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
-       d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_BPF, M_WAITOK);
-       d->bd_hbuf = NULL;
-       d->bd_slen = 0;
-       d->bd_hlen = 0;
-}
-
 /*
  * Simple data copy to the current kernel buffer.
  */
@@ -183,18 +168,42 @@
 bpf_buffer_ioctl_sblen(struct bpf_d *d, u_int *i)
 {
        u_int size;
+       caddr_t fbuf, sbuf;
 
+       size = *i;
+       if (size > bpf_maxbufsize)
+               *i = size = bpf_maxbufsize;
+       else if (size < BPF_MINBUFSIZE)
+               *i = size = BPF_MINBUFSIZE;
+
+       /* Allocate buffers immediately */
+       fbuf = (caddr_t)malloc(size, M_BPF, M_WAITOK);
+       sbuf = (caddr_t)malloc(size, M_BPF, M_WAITOK);
+
        BPFD_LOCK(d);
        if (d->bd_bif != NULL) {
+               /* Interface already attached, unable to change buffers */
                BPFD_UNLOCK(d);
+               free(fbuf, M_BPF);
+               free(sbuf, M_BPF);
                return (EINVAL);
        }
-       size = *i;
-       if (size > bpf_maxbufsize)
-               *i = size = bpf_maxbufsize;
-       else if (size < BPF_MINBUFSIZE)
-               *i = size = BPF_MINBUFSIZE;
+
+       /* Free old buffers if set */
+       if (d->bd_fbuf != NULL)
+               free(d->bd_fbuf, M_BPF);
+       if (d->bd_sbuf != NULL)
+               free(d->bd_sbuf, M_BPF);
+
+       /* Fill in new data */
        d->bd_bufsize = size;
+       d->bd_fbuf = fbuf;
+       d->bd_sbuf = sbuf;
+
+       d->bd_hbuf = NULL;
+       d->bd_slen = 0;
+       d->bd_hlen = 0;
+
        BPFD_UNLOCK(d);
        return (0);
 }
Index: net/bpf_buffer.h
===================================================================
--- net/bpf_buffer.h    (revision 239830)
+++ net/bpf_buffer.h    (working copy)
@@ -36,7 +36,6 @@
 #error "no user-serviceable parts inside"
 #endif
 
-void   bpf_buffer_alloc(struct bpf_d *d);
 void   bpf_buffer_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
            void *src, u_int len);
 void   bpf_buffer_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
Index: net/bpfdesc.h
===================================================================
--- net/bpfdesc.h       (revision 239830)
+++ net/bpfdesc.h       (working copy)
@@ -79,6 +79,7 @@
        u_char          bd_promisc;     /* true if listening promiscuously */
        u_char          bd_state;       /* idle, waiting, or timed out */
        u_char          bd_immediate;   /* true to return on packet arrival */
+       u_char          bd_writer;      /* non-zero if d is writer-only */
        int             bd_hdrcmplt;    /* false to fill in src lladdr 
automatically */
        int             bd_direction;   /* select packet direction */
        int             bd_feedback;    /* true to feed back sent packets */
@@ -86,7 +87,7 @@
        int             bd_sig;         /* signal to send upon packet reception 
*/
        struct sigio *  bd_sigio;       /* information for async I/O */
        struct selinfo  bd_sel;         /* bsd select info */
-       struct mtx      bd_mtx;         /* mutex for this descriptor */
+       struct mtx      bd_lock;        /* per-descriptor lock */
        struct callout  bd_callout;     /* for BPF timeouts with select */
        struct label    *bd_label;      /* MAC label for descriptor */
        u_int64_t       bd_fcount;      /* number of packets which matched 
filter */
@@ -105,10 +106,16 @@
 #define BPF_WAITING    1               /* waiting for read timeout in select */
 #define BPF_TIMED_OUT  2               /* read timeout has expired in select */
 
-#define BPFD_LOCK(bd)          mtx_lock(&(bd)->bd_mtx)
-#define BPFD_UNLOCK(bd)                mtx_unlock(&(bd)->bd_mtx)
-#define BPFD_LOCK_ASSERT(bd)   mtx_assert(&(bd)->bd_mtx, MA_OWNED)
+#define BPFD_LOCK(bd)          mtx_lock(&(bd)->bd_lock)
+#define BPFD_UNLOCK(bd)                mtx_unlock(&(bd)->bd_lock)
+#define BPFD_LOCK_ASSERT(bd)   mtx_assert(&(bd)->bd_lock, MA_OWNED)
 
+#define BPF_PID_REFRESH(bd, td)        (bd)->bd_pid = (td)->td_proc->p_pid
+#define BPF_PID_REFRESH_CUR(bd)        (bd)->bd_pid = curthread->td_proc->p_pid
+
+#define BPF_LOCK()             mtx_lock(&bpf_mtx)
+#define BPF_UNLOCK()           mtx_unlock(&bpf_mtx)
+#define BPF_LOCK_ASSERT()      mtx_assert(&bpf_mtx, MA_OWNED)
 /*
  * External representation of the bpf descriptor
  */
@@ -143,7 +150,11 @@
        u_int64_t       bd_spare[4];
 };
 
-#define BPFIF_LOCK(bif)                mtx_lock(&(bif)->bif_mtx)
-#define BPFIF_UNLOCK(bif)      mtx_unlock(&(bif)->bif_mtx)
+#define BPFIF_RLOCK(bif)       rw_rlock(&(bif)->bif_lock)
+#define BPFIF_RUNLOCK(bif)     rw_runlock(&(bif)->bif_lock)
+#define BPFIF_WLOCK(bif)       rw_wlock(&(bif)->bif_lock)
+#define BPFIF_WUNLOCK(bif)     rw_wunlock(&(bif)->bif_lock)
 
+#define BPFIF_FLAG_DYING       1       /* Reject new bpf consumers */
+
 #endif

_______________________________________________
freebsd-stable@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"

Re: 8.3: kernel panic in bpf.c catchpacket()

Reply via email to