I'd like to make sure that bpf_tap(9) does not grab the KERNEL_LOCK().
The reason is to reduce the potential lock ordering problems within PF.
I'm currently using a mutex to serialize buffer changes, but since
bpf_wakeup() will still need the KERNEL_LOCK(), I'm using a task for
that.
Diff below only introduce this task. I'd like to commit it first to
make sure possible regressions are fixed before playing with the mutex
diff.
Note that an acceptable race can occur with this diff. The value of
``bd_async'' and/or ``bd_sig'' might change between the bpf_wakeup()
call and the check.
ok?
Index: net/bpf.c
===================================================================
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.147
diff -u -p -r1.147 bpf.c
--- net/bpf.c 15 Aug 2016 07:20:14 -0000 1.147
+++ net/bpf.c 15 Aug 2016 13:55:04 -0000
@@ -57,6 +57,8 @@
#include <sys/atomic.h>
#include <sys/srp.h>
#include <sys/specdev.h>
+#include <sys/selinfo.h>
+#include <sys/task.h>
#include <net/if.h>
#include <net/bpf.h>
@@ -103,6 +105,7 @@ int bpf_setif(struct bpf_d *, struct ifr
int bpfpoll(dev_t, int, struct proc *);
int bpfkqfilter(dev_t, struct knote *);
void bpf_wakeup(struct bpf_d *);
+void bpf_wakeup_cb(void *);
void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
void (*)(const void *, void *, size_t), struct timeval *);
void bpf_reset_d(struct bpf_d *);
@@ -345,6 +348,7 @@ bpfopen(dev_t dev, int flag, int mode, s
bd->bd_unit = unit;
bd->bd_bufsize = bpf_bufsize;
bd->bd_sig = SIGIO;
+ task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
if (flag & FNONBLOCK)
bd->bd_rtout = -1;
@@ -515,12 +519,28 @@ bpfread(dev_t dev, struct uio *uio, int
void
bpf_wakeup(struct bpf_d *d)
{
- wakeup((caddr_t)d);
+ /*
+ * As long as csignal() and selwakeup() need to be protected
+ * by the KERNEL_LOCK() we have to delay the wakeup to
+ * another context to keep the hot path KERNEL_LOCK()-free.
+ */
+ bpf_get(d);
+ task_add(systq, &d->bd_wake_task);
+}
+
+void
+bpf_wakeup_cb(void *xd)
+{
+ struct bpf_d *d = xd;
+
+ KERNEL_ASSERT_LOCKED();
+
+ wakeup(d);
if (d->bd_async && d->bd_sig)
- csignal(d->bd_pgid, d->bd_sig,
- d->bd_siguid, d->bd_sigeuid);
+ csignal(d->bd_pgid, d->bd_sig, d->bd_siguid, d->bd_sigeuid);
selwakeup(&d->bd_sel);
+ bpf_put(d);
}
int
Index: net/bpfdesc.h
===================================================================
RCS file: /cvs/src/sys/net/bpfdesc.h,v
retrieving revision 1.30
diff -u -p -r1.30 bpfdesc.h
--- net/bpfdesc.h 30 Mar 2016 12:33:10 -0000 1.30
+++ net/bpfdesc.h 15 Aug 2016 13:55:04 -0000
@@ -42,8 +42,6 @@
#ifdef _KERNEL
-#include <sys/selinfo.h>
-
/*
* Descriptor associated with each open bpf file.
*/
@@ -90,6 +88,8 @@ struct bpf_d {
struct selinfo bd_sel; /* bsd select info */
int bd_unit; /* logical unit number */
LIST_ENTRY(bpf_d) bd_list; /* descriptor list */
+
+ struct task bd_wake_task; /* delay csignal() and selwakeup() */
};
/*