Re: bpf_mtap(9) w/o KERNEL_LOCK
On Mon, Dec 12, 2016 at 03:22:36PM +0100, Martin Pieuchot wrote: > @@ -56,15 +56,17 @@ struct bpf_d { ... > - struct bpf_if * bd_bif; /* interface descriptor */ > + int __in_uiomove; > + > + struct bpf_if *bd_bif; /* interface descriptor */ If __in_uiomove is a debug trick, write that in a comment. As far as I know, the __ prefix means it is a compiler extension. So prefix the field with bd_ and the comment tells us, we can remove it in a year. otherwise OK bluhm@
Re: bpf_mtap(9) w/o KERNEL_LOCK
On 10/12/16(Sat) 23:01, Jonathan Matthew wrote: > On Mon, Nov 28, 2016 at 04:01:14PM +0100, Martin Pieuchot wrote: > > Last diff to trade the KERNEL_LOCK for a mutex in order to protect data > > accessed inside bpf_catchpacket(). > > > > Note about the multiples data structures: > > > > - selwakeup() is called in a thread context (task) so we rely on the > > KERNEL_LOCK() to serialize access to kqueue(9) data. > > > > - the global list of descriptor ``bpf_d_list``, accessed via > > bpfilter_lookup() is also protected by the KERNEL_LOCK(). > > > > - bpf_get() increments the refcount of a descriptor. It is needed > > to increment it around ifppromisc() which can sleep. > > > > - A mutex now protects the rotating buffers and their associated > > fields. > > > > - The dance around uiomove(9) is here to check that buffers aren't > > rotated while data is copied to userland. Setting ``b->bd_fbuf'' > > to NULL should be enough to let bpf_catchpacket() to drop the > > patcket. But I added ``__in_uiomove'' to be able to have usable > > panic if something weird happen. > > > > Comments, oks? > > The thing I'm concerned about here is the call to bpf_wakeup() from > bpf_catchpacket() - this modifies the bpf_d refcount outside the kernel lock. > Elsewhere the refcount is modified while holding the kernel lock, with or > without the mutex. It'll never free the bpf_d there, since the bpf_d list > still holds a reference, but the refcount operations aren't atomic, so the > counter could get messed up. Is there a reason not to use atomics there? Good point. Here's an updated diff that uses the atomics and addresses Alexander's comments. Index: net/bpf.c === RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.155 diff -u -p -r1.155 bpf.c --- net/bpf.c 28 Nov 2016 10:16:08 - 1.155 +++ net/bpf.c 12 Dec 2016 14:18:38 - @@ -116,6 +116,9 @@ int bpf_sysctl_locked(int *, u_int, void struct bpf_d *bpfilter_lookup(int); +/* + * Called holding ``bd_mtx''. + */ void bpf_attachd(struct bpf_d *, struct bpf_if *); void bpf_detachd(struct bpf_d *); void bpf_resetd(struct bpf_d *); @@ -260,11 +263,12 @@ bpf_movein(struct uio *uio, u_int linkty /* * Attach file to the bpf interface, i.e. make d listen on bp. - * Must be called at splnet. */ void bpf_attachd(struct bpf_d *d, struct bpf_if *bp) { + MUTEX_ASSERT_LOCKED(>bd_mtx); + /* * Point d at bp, and add d to the interface's list of listeners. * Finally, point the driver's bpf cookie at the interface so @@ -287,6 +291,8 @@ bpf_detachd(struct bpf_d *d) { struct bpf_if *bp; + MUTEX_ASSERT_LOCKED(>bd_mtx); + bp = d->bd_bif; /* Not attached. */ if (bp == NULL) @@ -313,7 +319,13 @@ bpf_detachd(struct bpf_d *d) int error; d->bd_promisc = 0; + + bpf_get(d); + mtx_leave(>bd_mtx); error = ifpromisc(bp->bif_ifp, 0); + mtx_enter(>bd_mtx); + bpf_put(d); + if (error && !(error == EINVAL || error == ENODEV)) /* * Something is really wrong if we were able to put @@ -353,6 +365,7 @@ bpfopen(dev_t dev, int flag, int mode, s bd->bd_unit = unit; bd->bd_bufsize = bpf_bufsize; bd->bd_sig = SIGIO; + mtx_init(>bd_mtx, IPL_NET); task_set(>bd_wake_task, bpf_wakeup_cb, bd); if (flag & FNONBLOCK) @@ -372,15 +385,14 @@ int bpfclose(dev_t dev, int flag, int mode, struct proc *p) { struct bpf_d *d; - int s; d = bpfilter_lookup(minor(dev)); - s = splnet(); + mtx_enter(>bd_mtx); bpf_detachd(d); bpf_wakeup(d); LIST_REMOVE(d, bd_list); + mtx_leave(>bd_mtx); bpf_put(d); - splx(s); return (0); } @@ -391,11 +403,13 @@ bpfclose(dev_t dev, int flag, int mode, * Zero the length of the new store buffer. */ #define ROTATE_BUFFERS(d) \ + KASSERT(d->__in_uiomove == 0); \ + MUTEX_ASSERT_LOCKED(>bd_mtx); \ (d)->bd_hbuf = (d)->bd_sbuf; \ (d)->bd_hlen = (d)->bd_slen; \ (d)->bd_sbuf = (d)->bd_fbuf; \ (d)->bd_slen = 0; \ - (d)->bd_fbuf = 0; + (d)->bd_fbuf = NULL; /* * bpfread - read next chunk of packets from buffers */ @@ -403,15 +417,17 @@ int bpfread(dev_t dev, struct uio *uio, int ioflag) { struct bpf_d *d; - int error; - int s; + caddr_t hbuf; + int hlen, error; + + KERNEL_ASSERT_LOCKED(); d = bpfilter_lookup(minor(dev)); if (d->bd_bif == NULL) return (ENXIO); - s = splnet(); bpf_get(d); + mtx_enter(>bd_mtx); /* * Restrict application to use a buffer the same size as @@ -460,8 +476,8 @@ bpfread(dev_t dev, struct uio
Re: bpf_mtap(9) w/o KERNEL_LOCK
On 07/12/16(Wed) 00:44, Alexander Bluhm wrote: > On Mon, Nov 28, 2016 at 04:01:14PM +0100, Martin Pieuchot wrote: > > @@ -717,7 +753,9 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t > > *(u_int *)addr = size = bpf_maxbufsize; > > else if (size < BPF_MINBUFSIZE) > > *(u_int *)addr = size = BPF_MINBUFSIZE; > > + mtx_enter(>bd_mtx); > > d->bd_bufsize = size; > > + mtx_leave(>bd_mtx); > > } > > break; > > > > bd_bufsize needs protection, but you only add it in BIOCSBLEN, > you should also put a mutex in BIOCGBLEN. I'm not sure to understand why. The value is only written in ioctl(2) context. The mutex is here to make sure the value wont change while bpf_catchpacket() is executed. > > @@ -778,30 +815,36 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t > > * Get device parameters. > > */ > > case BIOCGDLT: > > + mtx_enter(>bd_mtx); > > if (d->bd_bif == NULL) > > error = EINVAL; > > else > > *(u_int *)addr = d->bd_bif->bif_dlt; > > + mtx_leave(>bd_mtx); > > break; > > bd_bif is not protected elsewhere. bif_dlt is only set during attach. > So I think you need no mutex here. Indeed. > > > > /* > > * Set device parameters. > > */ > > case BIOCSDLT: > > + mtx_enter(>bd_mtx); > > if (d->bd_bif == NULL) > > error = EINVAL; > > else > > error = bpf_setdlt(d, *(u_int *)addr); > > + mtx_leave(>bd_mtx); > > break; > > bd_bif needs not protection, so put the mutex only in the else path. > Then it is obvious why it is there. Ack. > > > > /* > > * Set interface name. > > */ > > case BIOCGETIF: > > + mtx_enter(>bd_mtx); > > if (d->bd_bif == NULL) > > error = EINVAL; > > else > > bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr); > > + mtx_leave(>bd_mtx); > > break; > > The interface name is not used by bpf without kernel lock. You do > not need a mutex here. Ack. > > @@ -1151,6 +1197,8 @@ filt_bpfread(struct knote *kn, long hint > > { > > struct bpf_d *d = kn->kn_hook; > > > > + KERNEL_ASSERT_LOCKED(); > > + > > kn->kn_data = d->bd_hlen; > > if (d->bd_immediate) > > kn->kn_data += d->bd_slen; > > You need the mutex here to access bd_slen. Ack. > > @@ -1232,12 +1279,10 @@ _bpf_mtap(caddr_t arg, const struct mbuf > > if (!gottime++) > > microtime(); > > > > - KERNEL_LOCK(); > > - s = splnet(); > > + mtx_enter(>bd_mtx); > > bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn, > > ); > > - splx(s); > > - KERNEL_UNLOCK(); > > + mtx_leave(>bd_mtx); > > > > if (d->bd_fildrop) > > drop = 1; > > bd_fildrop can be changed by ioctl. Can we expect to read the > correct value without a memory barrier? > > Same question applies to bd_dirfilt. We probably need some. I guess that the actual code works because these events happen occasionally. > bd_rcount is a long value incremented atomically in _bpf_mtap(). > It is read from an other CPU with kernel lock during an ioctl. Is > this sufficient to get a consisten value? Do we also need an atomic > operation when reading it? Do we need a consistent value? > > --- net/bpfdesc.h 22 Aug 2016 10:40:36 - 1.31 > > +++ net/bpfdesc.h 28 Nov 2016 14:51:06 - > > @@ -56,15 +56,17 @@ struct bpf_d { > > * fbuf (free) - When read is done, put cluster here. > > * On receiving, if sbuf is full and fbuf is 0, packet is dropped. > > */ > > + struct mutexbd_mtx; /* protect buffer slots below */ > > caddr_t bd_sbuf;/* store slot */ > > caddr_t bd_hbuf;/* hold slot */ > > caddr_t bd_fbuf;/* free slot */ > > int bd_slen;/* current length of store buffer */ > > int bd_hlen;/* current length of hold buffer */ > > - > > int bd_bufsize; /* absolute length of buffers */ > > So bd_mtx is protecting the fields above. Everything below must > either be not accessed without kernel lock, or only be set during > attach. Is that correct or do I have a wrong understanding of the > SMP model? I'm not sure I understood your sentence, but I think you got the model right :) > > - struct bpf_if * bd_bif; /* interface descriptor */ > > + int __in_uiomove; > > Why do you prefix it with __ ? bd_uiomove would be more consistent. Because it's a
Re: bpf_mtap(9) w/o KERNEL_LOCK
On Mon, Nov 28, 2016 at 04:01:14PM +0100, Martin Pieuchot wrote: > Last diff to trade the KERNEL_LOCK for a mutex in order to protect data > accessed inside bpf_catchpacket(). > > Note about the multiples data structures: > > - selwakeup() is called in a thread context (task) so we rely on the > KERNEL_LOCK() to serialize access to kqueue(9) data. > > - the global list of descriptor ``bpf_d_list``, accessed via > bpfilter_lookup() is also protected by the KERNEL_LOCK(). > > - bpf_get() increments the refcount of a descriptor. It is needed > to increment it around ifppromisc() which can sleep. > > - A mutex now protects the rotating buffers and their associated > fields. > > - The dance around uiomove(9) is here to check that buffers aren't > rotated while data is copied to userland. Setting ``b->bd_fbuf'' > to NULL should be enough to let bpf_catchpacket() to drop the > patcket. But I added ``__in_uiomove'' to be able to have usable > panic if something weird happen. > > Comments, oks? The thing I'm concerned about here is the call to bpf_wakeup() from bpf_catchpacket() - this modifies the bpf_d refcount outside the kernel lock. Elsewhere the refcount is modified while holding the kernel lock, with or without the mutex. It'll never free the bpf_d there, since the bpf_d list still holds a reference, but the refcount operations aren't atomic, so the counter could get messed up. Is there a reason not to use atomics there?
Re: bpf_mtap(9) w/o KERNEL_LOCK
On Mon, Nov 28, 2016 at 04:01:14PM +0100, Martin Pieuchot wrote: > @@ -313,7 +319,13 @@ bpf_detachd(struct bpf_d *d) > int error; > > d->bd_promisc = 0; > + > + bpf_get(d); > + mtx_leave(>bd_mtx); > error = ifpromisc(bp->bif_ifp, 0); > + mtx_enter(>bd_mtx); > + bpf_put(d); > + If I see something like this, I ask myself wether bp has to be protected by the mutex like this: bpf_get(d); ifp = bp->bif_ifp; mtx_leave(>bd_mtx); error = ifpromisc(ifp, 0); mtx_enter(>bd_mtx); bpf_put(d); But I think your code is safe as bd_bif is only modified with kernel lock. > @@ -542,6 +568,7 @@ bpf_wakeup_cb(void *xd) > > selwakeup(>bd_sel); > bpf_put(d); > + > } > > int This new line is wrong. > @@ -717,7 +753,9 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t > *(u_int *)addr = size = bpf_maxbufsize; > else if (size < BPF_MINBUFSIZE) > *(u_int *)addr = size = BPF_MINBUFSIZE; > + mtx_enter(>bd_mtx); > d->bd_bufsize = size; > + mtx_leave(>bd_mtx); > } > break; > bd_bufsize needs protection, but you only add it in BIOCSBLEN, you should also put a mutex in BIOCGBLEN. > @@ -778,30 +815,36 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t >* Get device parameters. >*/ > case BIOCGDLT: > + mtx_enter(>bd_mtx); > if (d->bd_bif == NULL) > error = EINVAL; > else > *(u_int *)addr = d->bd_bif->bif_dlt; > + mtx_leave(>bd_mtx); > break; bd_bif is not protected elsewhere. bif_dlt is only set during attach. So I think you need no mutex here. > > /* >* Set device parameters. >*/ > case BIOCSDLT: > + mtx_enter(>bd_mtx); > if (d->bd_bif == NULL) > error = EINVAL; > else > error = bpf_setdlt(d, *(u_int *)addr); > + mtx_leave(>bd_mtx); > break; bd_bif needs not protection, so put the mutex only in the else path. Then it is obvious why it is there. > > /* >* Set interface name. >*/ > case BIOCGETIF: > + mtx_enter(>bd_mtx); > if (d->bd_bif == NULL) > error = EINVAL; > else > bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr); > + mtx_leave(>bd_mtx); > break; The interface name is not used by bpf without kernel lock. You do not need a mutex here. > @@ -1151,6 +1197,8 @@ filt_bpfread(struct knote *kn, long hint > { > struct bpf_d *d = kn->kn_hook; > > + KERNEL_ASSERT_LOCKED(); > + > kn->kn_data = d->bd_hlen; > if (d->bd_immediate) > kn->kn_data += d->bd_slen; You need the mutex here to access bd_slen. > @@ -1232,12 +1279,10 @@ _bpf_mtap(caddr_t arg, const struct mbuf > if (!gottime++) > microtime(); > > - KERNEL_LOCK(); > - s = splnet(); > + mtx_enter(>bd_mtx); > bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn, > ); > - splx(s); > - KERNEL_UNLOCK(); > + mtx_leave(>bd_mtx); > > if (d->bd_fildrop) > drop = 1; bd_fildrop can be changed by ioctl. Can we expect to read the correct value without a memory barrier? Same question applies to bd_dirfilt. bd_rcount is a long value incremented atomically in _bpf_mtap(). It is read from an other CPU with kernel lock during an ioctl. Is this sufficient to get a consisten value? Do we also need an atomic operation when reading it? > --- net/bpfdesc.h 22 Aug 2016 10:40:36 - 1.31 > +++ net/bpfdesc.h 28 Nov 2016 14:51:06 - > @@ -56,15 +56,17 @@ struct bpf_d { >* fbuf (free) - When read is done, put cluster here. >* On receiving, if sbuf is full and fbuf is 0, packet is dropped. >*/ > + struct mutexbd_mtx; /* protect buffer slots below */ > caddr_t bd_sbuf;/* store slot */ > caddr_t bd_hbuf;/* hold slot */ > caddr_t bd_fbuf;/* free slot */ > int bd_slen;/* current length of store buffer */ > int bd_hlen;/* current length of hold buffer */ > - > int bd_bufsize; /* absolute length of buffers */ So bd_mtx is protecting the fields above. Everything below must either be not accessed without kernel lock, or only be
bpf_mtap(9) w/o KERNEL_LOCK
Last diff to trade the KERNEL_LOCK for a mutex in order to protect data accessed inside bpf_catchpacket(). Note about the multiples data structures: - selwakeup() is called in a thread context (task) so we rely on the KERNEL_LOCK() to serialize access to kqueue(9) data. - the global list of descriptor ``bpf_d_list``, accessed via bpfilter_lookup() is also protected by the KERNEL_LOCK(). - bpf_get() increments the refcount of a descriptor. It is needed to increment it around ifppromisc() which can sleep. - A mutex now protects the rotating buffers and their associated fields. - The dance around uiomove(9) is here to check that buffers aren't rotated while data is copied to userland. Setting ``b->bd_fbuf'' to NULL should be enough to let bpf_catchpacket() to drop the patcket. But I added ``__in_uiomove'' to be able to have usable panic if something weird happen. Comments, oks? Index: net/bpf.c === RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.155 diff -u -p -r1.155 bpf.c --- net/bpf.c 28 Nov 2016 10:16:08 - 1.155 +++ net/bpf.c 28 Nov 2016 14:51:06 - @@ -116,6 +116,9 @@ int bpf_sysctl_locked(int *, u_int, void struct bpf_d *bpfilter_lookup(int); +/* + * Called holding ``bd_mtx''. + */ void bpf_attachd(struct bpf_d *, struct bpf_if *); void bpf_detachd(struct bpf_d *); void bpf_resetd(struct bpf_d *); @@ -260,11 +263,12 @@ bpf_movein(struct uio *uio, u_int linkty /* * Attach file to the bpf interface, i.e. make d listen on bp. - * Must be called at splnet. */ void bpf_attachd(struct bpf_d *d, struct bpf_if *bp) { + MUTEX_ASSERT_LOCKED(>bd_mtx); + /* * Point d at bp, and add d to the interface's list of listeners. * Finally, point the driver's bpf cookie at the interface so @@ -287,6 +291,8 @@ bpf_detachd(struct bpf_d *d) { struct bpf_if *bp; + MUTEX_ASSERT_LOCKED(>bd_mtx); + bp = d->bd_bif; /* Not attached. */ if (bp == NULL) @@ -313,7 +319,13 @@ bpf_detachd(struct bpf_d *d) int error; d->bd_promisc = 0; + + bpf_get(d); + mtx_leave(>bd_mtx); error = ifpromisc(bp->bif_ifp, 0); + mtx_enter(>bd_mtx); + bpf_put(d); + if (error && !(error == EINVAL || error == ENODEV)) /* * Something is really wrong if we were able to put @@ -353,6 +365,7 @@ bpfopen(dev_t dev, int flag, int mode, s bd->bd_unit = unit; bd->bd_bufsize = bpf_bufsize; bd->bd_sig = SIGIO; + mtx_init(>bd_mtx, IPL_NET); task_set(>bd_wake_task, bpf_wakeup_cb, bd); if (flag & FNONBLOCK) @@ -372,15 +385,14 @@ int bpfclose(dev_t dev, int flag, int mode, struct proc *p) { struct bpf_d *d; - int s; d = bpfilter_lookup(minor(dev)); - s = splnet(); + mtx_enter(>bd_mtx); bpf_detachd(d); bpf_wakeup(d); LIST_REMOVE(d, bd_list); + mtx_leave(>bd_mtx); bpf_put(d); - splx(s); return (0); } @@ -391,11 +403,13 @@ bpfclose(dev_t dev, int flag, int mode, * Zero the length of the new store buffer. */ #define ROTATE_BUFFERS(d) \ + KASSERT(d->__in_uiomove == 0); \ + MUTEX_ASSERT_LOCKED(>bd_mtx); \ (d)->bd_hbuf = (d)->bd_sbuf; \ (d)->bd_hlen = (d)->bd_slen; \ (d)->bd_sbuf = (d)->bd_fbuf; \ (d)->bd_slen = 0; \ - (d)->bd_fbuf = 0; + (d)->bd_fbuf = NULL; /* * bpfread - read next chunk of packets from buffers */ @@ -403,15 +417,17 @@ int bpfread(dev_t dev, struct uio *uio, int ioflag) { struct bpf_d *d; - int error; - int s; + caddr_t hbuf; + int hlen, error; + + KERNEL_ASSERT_LOCKED(); d = bpfilter_lookup(minor(dev)); if (d->bd_bif == NULL) return (ENXIO); - s = splnet(); bpf_get(d); + mtx_enter(>bd_mtx); /* * Restrict application to use a buffer the same size as @@ -460,8 +476,8 @@ bpfread(dev_t dev, struct uio *uio, int error = EWOULDBLOCK; } else { if ((d->bd_rdStart + d->bd_rtout) < ticks) { - error = tsleep((caddr_t)d, PRINET|PCATCH, "bpf", - d->bd_rtout); + error = msleep(d, >bd_mtx, PRINET|PCATCH, + "bpf", d->bd_rtout); } else error = EWOULDBLOCK; } @@ -492,22 +508,30 @@ bpfread(dev_t dev, struct uio *uio, int /* * At this point, we know we have something in the hold slot. */ - splx(s); + hbuf = d->bd_hbuf; + hlen = d->bd_hlen; + d->bd_hbuf =
Re: bpf_mtap(9) w/o KERNEL_LOCK
On Tue, Nov 22, 2016 at 11:54:47AM +0100, Martin Pieuchot wrote: > Next extract diff that tweaks bpf_detachd(). > > The goal here is to remove ``d'' from the list and NULLify ``d->bd_bif'' > before calling ifpromisc(). > > The reason is that ifpromisc() can sleep. Think USB ;) So we'll have to > release the mutex before calling it. So we want to make sure ``d'' is no > longer in the interface descriptor list at this point. > > ok? OK bluhm@ > > Index: net/bpf.c > === > RCS file: /cvs/src/sys/net/bpf.c,v > retrieving revision 1.154 > diff -u -p -r1.154 bpf.c > --- net/bpf.c 21 Nov 2016 09:15:40 - 1.154 > +++ net/bpf.c 22 Nov 2016 10:47:05 - > @@ -288,6 +288,23 @@ bpf_detachd(struct bpf_d *d) > struct bpf_if *bp; > > bp = d->bd_bif; > + /* Not attached. */ > + if (bp == NULL) > + return; > + > + /* Remove ``d'' from the interface's descriptor list. */ > + KERNEL_ASSERT_LOCKED(); > + SRPL_REMOVE_LOCKED(_d_rc, >bif_dlist, d, bpf_d, bd_next); > + > + if (SRPL_EMPTY_LOCKED(>bif_dlist)) { > + /* > + * Let the driver know that there are no more listeners. > + */ > + *bp->bif_driverp = NULL; > + } > + > + d->bd_bif = NULL; > + > /* >* Check if this descriptor had requested promiscuous mode. >* If so, turn it off. > @@ -305,19 +322,6 @@ bpf_detachd(struct bpf_d *d) >*/ > panic("bpf: ifpromisc failed"); > } > - > - /* Remove d from the interface's descriptor list. */ > - KERNEL_ASSERT_LOCKED(); > - SRPL_REMOVE_LOCKED(_d_rc, >bif_dlist, d, bpf_d, bd_next); > - > - if (SRPL_EMPTY_LOCKED(>bif_dlist)) { > - /* > - * Let the driver know that there are no more listeners. > - */ > - *d->bd_bif->bif_driverp = 0; > - } > - > - d->bd_bif = NULL; > } > > void > @@ -372,8 +376,7 @@ bpfclose(dev_t dev, int flag, int mode, > > d = bpfilter_lookup(minor(dev)); > s = splnet(); > - if (d->bd_bif) > - bpf_detachd(d); > + bpf_detachd(d); > bpf_wakeup(d); > LIST_REMOVE(d, bd_list); > bpf_put(d); > @@ -1033,12 +1036,10 @@ bpf_setif(struct bpf_d *d, struct ifreq > goto out; > } > if (candidate != d->bd_bif) { > - if (d->bd_bif) > - /* > - * Detach if attached to something else. > - */ > - bpf_detachd(d); > - > + /* > + * Detach if attached to something else. > + */ > + bpf_detachd(d); > bpf_attachd(d, candidate); > } > bpf_resetd(d);
Re: bpf_mtap(9) w/o KERNEL_LOCK
On 13/09/16(Tue) 12:23, Martin Pieuchot wrote: > Here's the big scary diff I've been using for some months now to stop > grabbing the KERNEL_LOCK() in bpf_mtap(9). This has been originally > written to prevent lock ordering inside pf_test(). Now that we're > heading toward using a rwlock, we won't have this problem, but fewer > usages of KERNEL_LOCK() is still interesting. > > I'm going to split this diff in small chunks to ease the review. But > I'd appreciate if people could try to break it, test & report back. > > Some notes: > > - Now that selwakeup() is called in a thread context (task) we only > rely on the KERNEL_LOCK() to serialize access to kqueue(9) data. > > - The reference counting is here to make sure a descriptor is not > freed during a sleep. That's why the KERNEL_LOCK() is still > necessary in the slow path. On the other hand bpf_catchpacket() > relies on the reference guaranteed by the SRP list. > > - A mutex now protects the rotating buffers and their associated > fields. It is dropped before calling ifpromisc() because USB > devices sleep. > > - The dance around uiomove(9) is here to check that buffers aren't > rotated while data is copied to userland. Setting ``b->bd_fbuf'' > to NULL should be enough to let bpf_catchpacket() to drop the > patcket. But I added ``__in_uiomove'' to be able to have usable > panic if something weird happen. Next extract diff that tweaks bpf_detachd(). The goal here is to remove ``d'' from the list and NULLify ``d->bd_bif'' before calling ifpromisc(). The reason is that ifpromisc() can sleep. Think USB ;) So we'll have to release the mutex before calling it. So we want to make sure ``d'' is no longer in the interface descriptor list at this point. ok? Index: net/bpf.c === RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.154 diff -u -p -r1.154 bpf.c --- net/bpf.c 21 Nov 2016 09:15:40 - 1.154 +++ net/bpf.c 22 Nov 2016 10:47:05 - @@ -288,6 +288,23 @@ bpf_detachd(struct bpf_d *d) struct bpf_if *bp; bp = d->bd_bif; + /* Not attached. */ + if (bp == NULL) + return; + + /* Remove ``d'' from the interface's descriptor list. */ + KERNEL_ASSERT_LOCKED(); + SRPL_REMOVE_LOCKED(_d_rc, >bif_dlist, d, bpf_d, bd_next); + + if (SRPL_EMPTY_LOCKED(>bif_dlist)) { + /* +* Let the driver know that there are no more listeners. +*/ + *bp->bif_driverp = NULL; + } + + d->bd_bif = NULL; + /* * Check if this descriptor had requested promiscuous mode. * If so, turn it off. @@ -305,19 +322,6 @@ bpf_detachd(struct bpf_d *d) */ panic("bpf: ifpromisc failed"); } - - /* Remove d from the interface's descriptor list. */ - KERNEL_ASSERT_LOCKED(); - SRPL_REMOVE_LOCKED(_d_rc, >bif_dlist, d, bpf_d, bd_next); - - if (SRPL_EMPTY_LOCKED(>bif_dlist)) { - /* -* Let the driver know that there are no more listeners. -*/ - *d->bd_bif->bif_driverp = 0; - } - - d->bd_bif = NULL; } void @@ -372,8 +376,7 @@ bpfclose(dev_t dev, int flag, int mode, d = bpfilter_lookup(minor(dev)); s = splnet(); - if (d->bd_bif) - bpf_detachd(d); + bpf_detachd(d); bpf_wakeup(d); LIST_REMOVE(d, bd_list); bpf_put(d); @@ -1033,12 +1036,10 @@ bpf_setif(struct bpf_d *d, struct ifreq goto out; } if (candidate != d->bd_bif) { - if (d->bd_bif) - /* -* Detach if attached to something else. -*/ - bpf_detachd(d); - + /* +* Detach if attached to something else. +*/ + bpf_detachd(d); bpf_attachd(d, candidate); } bpf_resetd(d);
Re: bpf_mtap(9) w/o KERNEL_LOCK
On Wed, Nov 16, 2016 at 12:18:48PM +0100, Martin Pieuchot wrote: > Here's another extracted diff: Use goto in read & write and always > increment the reference count in write. > > ok? OK bluhm@ > > Index: net/bpf.c > === > RCS file: /cvs/src/sys/net/bpf.c,v > retrieving revision 1.151 > diff -u -p -r1.151 bpf.c > --- net/bpf.c 16 Nov 2016 09:00:01 - 1.151 > +++ net/bpf.c 16 Nov 2016 11:08:25 - > @@ -406,16 +406,17 @@ bpfread(dev_t dev, struct uio *uio, int > if (d->bd_bif == NULL) > return (ENXIO); > > + s = splnet(); > + bpf_get(d); > + > /* >* Restrict application to use a buffer the same size as >* as kernel buffers. >*/ > - if (uio->uio_resid != d->bd_bufsize) > - return (EINVAL); > - > - s = splnet(); > - > - bpf_get(d); > + if (uio->uio_resid != d->bd_bufsize) { > + error = EINVAL; > + goto out; > + } > > /* >* If there's a timeout, bd_rdStart is tagged when we start the read. > @@ -431,13 +432,12 @@ bpfread(dev_t dev, struct uio *uio, int >* ends when the timeout expires or when enough packets >* have arrived to fill the store buffer. >*/ > - while (d->bd_hbuf == 0) { > + while (d->bd_hbuf == NULL) { > if (d->bd_bif == NULL) { > /* interface is gone */ > if (d->bd_slen == 0) { > - bpf_put(d); > - splx(s); > - return (EIO); > + error = EIO; > + goto out; > } > ROTATE_BUFFERS(d); > break; > @@ -461,18 +461,15 @@ bpfread(dev_t dev, struct uio *uio, int > } else > error = EWOULDBLOCK; > } > - if (error == EINTR || error == ERESTART) { > - bpf_put(d); > - splx(s); > - return (error); > - } > + if (error == EINTR || error == ERESTART) > + goto out; > if (error == EWOULDBLOCK) { > /* >* On a timeout, return what's in the buffer, >* which may be nothing. If there is something >* in the store buffer, we can rotate the buffers. >*/ > - if (d->bd_hbuf) > + if (d->bd_hbuf != NULL) > /* >* We filled up the buffer in between >* getting the timeout and arriving > @@ -481,9 +478,8 @@ bpfread(dev_t dev, struct uio *uio, int > break; > > if (d->bd_slen == 0) { > - bpf_put(d); > - splx(s); > - return (0); > + error = 0; > + goto out; > } > ROTATE_BUFFERS(d); > break; > @@ -505,7 +501,7 @@ bpfread(dev_t dev, struct uio *uio, int > d->bd_fbuf = d->bd_hbuf; > d->bd_hbuf = NULL; > d->bd_hlen = 0; > - > +out: > bpf_put(d); > splx(s); > > @@ -554,32 +550,40 @@ bpfwrite(dev_t dev, struct uio *uio, int > struct bpf_insn *fcode = NULL; > int error, s; > struct sockaddr_storage dst; > + u_int dlt; > > d = bpfilter_lookup(minor(dev)); > if (d->bd_bif == NULL) > return (ENXIO); > > + bpf_get(d); > ifp = d->bd_bif->bif_ifp; > > - if ((ifp->if_flags & IFF_UP) == 0) > - return (ENETDOWN); > + if ((ifp->if_flags & IFF_UP) == 0) { > + error = ENETDOWN; > + goto out; > + } > > - if (uio->uio_resid == 0) > - return (0); > + if (uio->uio_resid == 0) { > + error = 0; > + goto out; > + } > > KERNEL_ASSERT_LOCKED(); /* for accessing bd_wfilter */ > bf = srp_get_locked(>bd_wfilter); > if (bf != NULL) > fcode = bf->bf_insns; > > - error = bpf_movein(uio, d->bd_bif->bif_dlt, , > - (struct sockaddr *), fcode); > + dlt = d->bd_bif->bif_dlt; > + > + error = bpf_movein(uio, dlt, , (struct sockaddr *), fcode); > if (error) > - return (error); > + goto out; > > if (m->m_pkthdr.len > ifp->if_mtu) { > m_freem(m); > - return (EMSGSIZE); > + error = EMSGSIZE; > + goto out; > } > > m->m_pkthdr.ph_rtableid = ifp->if_rdomain; > @@ -591,9 +595,9 @@ bpfwrite(dev_t dev, struct uio *uio, int > s =
Re: bpf_mtap(9) w/o KERNEL_LOCK
On 13/09/16(Tue) 12:23, Martin Pieuchot wrote: > Here's the big scary diff I've been using for some months now to stop > grabbing the KERNEL_LOCK() in bpf_mtap(9). This has been originally > written to prevent lock ordering inside pf_test(). Now that we're > heading toward using a rwlock, we won't have this problem, but fewer > usages of KERNEL_LOCK() is still interesting. > > I'm going to split this diff in small chunks to ease the review. But > I'd appreciate if people could try to break it, test & report back. > > Some notes: > > - Now that selwakeup() is called in a thread context (task) we only > rely on the KERNEL_LOCK() to serialize access to kqueue(9) data. > > - The reference counting is here to make sure a descriptor is not > freed during a sleep. That's why the KERNEL_LOCK() is still > necessary in the slow path. On the other hand bpf_catchpacket() > relies on the reference guaranteed by the SRP list. > > - A mutex now protects the rotating buffers and their associated > fields. It is dropped before calling ifpromisc() because USB > devices sleep. > > - The dance around uiomove(9) is here to check that buffers aren't > rotated while data is copied to userland. Setting ``b->bd_fbuf'' > to NULL should be enough to let bpf_catchpacket() to drop the > patcket. But I added ``__in_uiomove'' to be able to have usable > panic if something weird happen. Here's another extracted diff: Use goto in read & write and always increment the reference count in write. ok? Index: net/bpf.c === RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.151 diff -u -p -r1.151 bpf.c --- net/bpf.c 16 Nov 2016 09:00:01 - 1.151 +++ net/bpf.c 16 Nov 2016 11:08:25 - @@ -406,16 +406,17 @@ bpfread(dev_t dev, struct uio *uio, int if (d->bd_bif == NULL) return (ENXIO); + s = splnet(); + bpf_get(d); + /* * Restrict application to use a buffer the same size as * as kernel buffers. */ - if (uio->uio_resid != d->bd_bufsize) - return (EINVAL); - - s = splnet(); - - bpf_get(d); + if (uio->uio_resid != d->bd_bufsize) { + error = EINVAL; + goto out; + } /* * If there's a timeout, bd_rdStart is tagged when we start the read. @@ -431,13 +432,12 @@ bpfread(dev_t dev, struct uio *uio, int * ends when the timeout expires or when enough packets * have arrived to fill the store buffer. */ - while (d->bd_hbuf == 0) { + while (d->bd_hbuf == NULL) { if (d->bd_bif == NULL) { /* interface is gone */ if (d->bd_slen == 0) { - bpf_put(d); - splx(s); - return (EIO); + error = EIO; + goto out; } ROTATE_BUFFERS(d); break; @@ -461,18 +461,15 @@ bpfread(dev_t dev, struct uio *uio, int } else error = EWOULDBLOCK; } - if (error == EINTR || error == ERESTART) { - bpf_put(d); - splx(s); - return (error); - } + if (error == EINTR || error == ERESTART) + goto out; if (error == EWOULDBLOCK) { /* * On a timeout, return what's in the buffer, * which may be nothing. If there is something * in the store buffer, we can rotate the buffers. */ - if (d->bd_hbuf) + if (d->bd_hbuf != NULL) /* * We filled up the buffer in between * getting the timeout and arriving @@ -481,9 +478,8 @@ bpfread(dev_t dev, struct uio *uio, int break; if (d->bd_slen == 0) { - bpf_put(d); - splx(s); - return (0); + error = 0; + goto out; } ROTATE_BUFFERS(d); break; @@ -505,7 +501,7 @@ bpfread(dev_t dev, struct uio *uio, int d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; d->bd_hlen = 0; - +out: bpf_put(d); splx(s); @@ -554,32 +550,40 @@ bpfwrite(dev_t dev, struct uio *uio, int struct bpf_insn *fcode = NULL; int error, s; struct sockaddr_storage dst; +
Re: bpf_mtap(9) w/o KERNEL_LOCK
On Mon, Nov 14, 2016 at 10:07:30AM +0100, Martin Pieuchot wrote: > Here's another extracted diff to move forward. Let bpf_allocbufs() > fail when allocating memory, this way we can call it while holding > a mutex. > > ok? OK bluhm@ > > Index: net/bpf.c > === > RCS file: /cvs/src/sys/net/bpf.c,v > retrieving revision 1.150 > diff -u -p -r1.150 bpf.c > --- net/bpf.c 16 Oct 2016 18:05:41 - 1.150 > +++ net/bpf.c 14 Nov 2016 09:02:51 - > @@ -92,7 +92,7 @@ int bpf_maxbufsize = BPF_MAXBUFSIZE; > struct bpf_if*bpf_iflist; > LIST_HEAD(, bpf_d) bpf_d_list; > > -void bpf_allocbufs(struct bpf_d *); > +int bpf_allocbufs(struct bpf_d *); > void bpf_ifname(struct ifnet *, struct ifreq *); > int _bpf_mtap(caddr_t, const struct mbuf *, u_int, > void (*)(const void *, void *, size_t)); > @@ -996,6 +996,7 @@ int > bpf_setif(struct bpf_d *d, struct ifreq *ifr) > { > struct bpf_if *bp, *candidate = NULL; > + int error = 0; > int s; > > /* > @@ -1012,30 +1013,33 @@ bpf_setif(struct bpf_d *d, struct ifreq > candidate = bp; > } > > - if (candidate != NULL) { > - /* > - * Allocate the packet buffers if we need to. > - * If we're already attached to requested interface, > - * just flush the buffer. > - */ > - if (d->bd_sbuf == NULL) > - bpf_allocbufs(d); > - s = splnet(); > - if (candidate != d->bd_bif) { > - if (d->bd_bif) > - /* > - * Detach if attached to something else. > - */ > - bpf_detachd(d); > + /* Not found. */ > + if (candidate == NULL) > + return (ENXIO); > > - bpf_attachd(d, candidate); > - } > - bpf_reset_d(d); > - splx(s); > - return (0); > + /* > + * Allocate the packet buffers if we need to. > + * If we're already attached to requested interface, > + * just flush the buffer. > + */ > + s = splnet(); > + if (d->bd_sbuf == NULL) { > + if ((error = bpf_allocbufs(d))) > + goto out; > } > - /* Not found. */ > - return (ENXIO); > + if (candidate != d->bd_bif) { > + if (d->bd_bif) > + /* > + * Detach if attached to something else. > + */ > + bpf_detachd(d); > + > + bpf_attachd(d, candidate); > + } > + bpf_reset_d(d); > +out: > + splx(s); > + return (error); > } > > /* > @@ -1434,13 +1438,23 @@ bpf_catchpacket(struct bpf_d *d, u_char > /* > * Initialize all nonzero fields of a descriptor. > */ > -void > +int > bpf_allocbufs(struct bpf_d *d) > { > - d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK); > - d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK); > + d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); > + if (d->bd_fbuf == NULL) > + return (ENOMEM); > + > + d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); > + if (d->bd_sbuf == NULL) { > + free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); > + return (ENOMEM); > + } > + > d->bd_slen = 0; > d->bd_hlen = 0; > + > + return (0); > } > > void
Re: bpf_mtap(9) w/o KERNEL_LOCK
On 13/09/16(Tue) 12:23, Martin Pieuchot wrote: > Here's the big scary diff I've been using for some months now to stop > grabbing the KERNEL_LOCK() in bpf_mtap(9). This has been originally > written to prevent lock ordering inside pf_test(). Now that we're > heading toward using a rwlock, we won't have this problem, but fewer > usages of KERNEL_LOCK() is still interesting. > > I'm going to split this diff in small chunks to ease the review. But > I'd appreciate if people could try to break it, test & report back. > > Some notes: > > - Now that selwakeup() is called in a thread context (task) we only > rely on the KERNEL_LOCK() to serialize access to kqueue(9) data. > > - The reference counting is here to make sure a descriptor is not > freed during a sleep. That's why the KERNEL_LOCK() is still > necessary in the slow path. On the other hand bpf_catchpacket() > relies on the reference guaranteed by the SRP list. > > - A mutex now protects the rotating buffers and their associated > fields. It is dropped before calling ifpromisc() because USB > devices sleep. > > - The dance around uiomove(9) is here to check that buffers aren't > rotated while data is copied to userland. Setting ``b->bd_fbuf'' > to NULL should be enough to let bpf_catchpacket() to drop the > patcket. But I added ``__in_uiomove'' to be able to have usable > panic if something weird happen. > > Comments? I've got many test reports but no actual review... anyone? Here's another extracted diff to move forward. Let bpf_allocbufs() fail when allocating memory, this way we can call it while holding a mutex. ok? Index: net/bpf.c === RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.150 diff -u -p -r1.150 bpf.c --- net/bpf.c 16 Oct 2016 18:05:41 - 1.150 +++ net/bpf.c 14 Nov 2016 09:02:51 - @@ -92,7 +92,7 @@ int bpf_maxbufsize = BPF_MAXBUFSIZE; struct bpf_if *bpf_iflist; LIST_HEAD(, bpf_d) bpf_d_list; -void bpf_allocbufs(struct bpf_d *); +intbpf_allocbufs(struct bpf_d *); void bpf_ifname(struct ifnet *, struct ifreq *); int_bpf_mtap(caddr_t, const struct mbuf *, u_int, void (*)(const void *, void *, size_t)); @@ -996,6 +996,7 @@ int bpf_setif(struct bpf_d *d, struct ifreq *ifr) { struct bpf_if *bp, *candidate = NULL; + int error = 0; int s; /* @@ -1012,30 +1013,33 @@ bpf_setif(struct bpf_d *d, struct ifreq candidate = bp; } - if (candidate != NULL) { - /* -* Allocate the packet buffers if we need to. -* If we're already attached to requested interface, -* just flush the buffer. -*/ - if (d->bd_sbuf == NULL) - bpf_allocbufs(d); - s = splnet(); - if (candidate != d->bd_bif) { - if (d->bd_bif) - /* -* Detach if attached to something else. -*/ - bpf_detachd(d); + /* Not found. */ + if (candidate == NULL) + return (ENXIO); - bpf_attachd(d, candidate); - } - bpf_reset_d(d); - splx(s); - return (0); + /* +* Allocate the packet buffers if we need to. +* If we're already attached to requested interface, +* just flush the buffer. +*/ + s = splnet(); + if (d->bd_sbuf == NULL) { + if ((error = bpf_allocbufs(d))) + goto out; } - /* Not found. */ - return (ENXIO); + if (candidate != d->bd_bif) { + if (d->bd_bif) + /* +* Detach if attached to something else. +*/ + bpf_detachd(d); + + bpf_attachd(d, candidate); + } + bpf_reset_d(d); +out: + splx(s); + return (error); } /* @@ -1434,13 +1438,23 @@ bpf_catchpacket(struct bpf_d *d, u_char /* * Initialize all nonzero fields of a descriptor. */ -void +int bpf_allocbufs(struct bpf_d *d) { - d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK); - d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK); + d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); + if (d->bd_fbuf == NULL) + return (ENOMEM); + + d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); + if (d->bd_sbuf == NULL) { + free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); + return (ENOMEM); + } + d->bd_slen = 0; d->bd_hlen = 0; + + return (0); } void
bpf_mtap(9) w/o KERNEL_LOCK
Here's the big scary diff I've been using for some months now to stop grabbing the KERNEL_LOCK() in bpf_mtap(9). This has been originally written to prevent lock ordering inside pf_test(). Now that we're heading toward using a rwlock, we won't have this problem, but fewer usages of KERNEL_LOCK() is still interesting. I'm going to split this diff in small chunks to ease the review. But I'd appreciate if people could try to break it, test & report back. Some notes: - Now that selwakeup() is called in a thread context (task) we only rely on the KERNEL_LOCK() to serialize access to kqueue(9) data. - The reference counting is here to make sure a descriptor is not freed during a sleep. That's why the KERNEL_LOCK() is still necessary in the slow path. On the other hand bpf_catchpacket() relies on the reference guaranteed by the SRP list. - A mutex now protects the rotating buffers and their associated fields. It is dropped before calling ifpromisc() because USB devices sleep. - The dance around uiomove(9) is here to check that buffers aren't rotated while data is copied to userland. Setting ``b->bd_fbuf'' to NULL should be enough to let bpf_catchpacket() to drop the patcket. But I added ``__in_uiomove'' to be able to have usable panic if something weird happen. Comments? Index: net/bpf.c === RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.149 diff -u -p -r1.149 bpf.c --- net/bpf.c 12 Sep 2016 16:24:37 - 1.149 +++ net/bpf.c 13 Sep 2016 09:56:18 - @@ -92,15 +92,13 @@ int bpf_maxbufsize = BPF_MAXBUFSIZE; struct bpf_if *bpf_iflist; LIST_HEAD(, bpf_d) bpf_d_list; -void bpf_allocbufs(struct bpf_d *); +intbpf_allocbufs(struct bpf_d *); void bpf_ifname(struct ifnet *, struct ifreq *); int_bpf_mtap(caddr_t, const struct mbuf *, u_int, void (*)(const void *, void *, size_t)); void bpf_mcopy(const void *, void *, size_t); intbpf_movein(struct uio *, u_int, struct mbuf **, struct sockaddr *, struct bpf_insn *); -void bpf_attachd(struct bpf_d *, struct bpf_if *); -void bpf_detachd(struct bpf_d *); intbpf_setif(struct bpf_d *, struct ifreq *); intbpfpoll(dev_t, int, struct proc *); intbpfkqfilter(dev_t, struct knote *); @@ -108,7 +106,6 @@ voidbpf_wakeup(struct bpf_d *); void bpf_wakeup_cb(void *); void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, void (*)(const void *, void *, size_t), struct timeval *); -void bpf_reset_d(struct bpf_d *); intbpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); intbpf_setdlt(struct bpf_d *, u_int); @@ -120,6 +117,13 @@ intbpf_sysctl_locked(int *, u_int, void struct bpf_d *bpfilter_lookup(int); /* + * Called holding ``bd_mtx''. + */ +void bpf_attachd(struct bpf_d *, struct bpf_if *); +void bpf_detachd(struct bpf_d *); +void bpf_resetd(struct bpf_d *); + +/* * Reference count access to descriptor buffers */ void bpf_get(struct bpf_d *); @@ -259,11 +263,12 @@ bpf_movein(struct uio *uio, u_int linkty /* * Attach file to the bpf interface, i.e. make d listen on bp. - * Must be called at splnet. */ void bpf_attachd(struct bpf_d *d, struct bpf_if *bp) { + MUTEX_ASSERT_LOCKED(>bd_mtx); + /* * Point d at bp, and add d to the interface's list of listeners. * Finally, point the driver's bpf cookie at the interface so @@ -286,7 +291,23 @@ bpf_detachd(struct bpf_d *d) { struct bpf_if *bp; + MUTEX_ASSERT_LOCKED(>bd_mtx); + bp = d->bd_bif; + + /* Remove d from the interface's descriptor list. */ + KERNEL_ASSERT_LOCKED(); + SRPL_REMOVE_LOCKED(_d_rc, >bif_dlist, d, bpf_d, bd_next); + + if (SRPL_EMPTY_LOCKED(>bif_dlist)) { + /* +* Let the driver know that there are no more listeners. +*/ + *bp->bif_driverp = NULL; + } + + d->bd_bif = NULL; + /* * Check if this descriptor had requested promiscuous mode. * If so, turn it off. @@ -295,7 +316,13 @@ bpf_detachd(struct bpf_d *d) int error; d->bd_promisc = 0; + + bpf_get(d); + mtx_leave(>bd_mtx); error = ifpromisc(bp->bif_ifp, 0); + mtx_enter(>bd_mtx); + bpf_put(d); + if (error && !(error == EINVAL || error == ENODEV)) /* * Something is really wrong if we were able to put @@ -304,19 +331,6 @@ bpf_detachd(struct bpf_d *d) */ panic("bpf: ifpromisc failed"); } - - /* Remove d from the interface's descriptor list. */ - KERNEL_ASSERT_LOCKED(); - SRPL_REMOVE_LOCKED(_d_rc, >bif_dlist, d, bpf_d, bd_next); - - if (SRPL_EMPTY_LOCKED(>bif_dlist)) {