I'd like to make sure that bpf_tap(9) does not grab the KERNEL_LOCK().
The reason is to reduce the potential lock ordering problems within PF.

I'm currently using a mutex to serialize buffer changes, but since
bpf_wakeup() will still need the KERNEL_LOCK(), I'm using a task for
that.

Diff below only introduce this task.  I'd like to commit it first to
make sure possible regressions are fixed before playing with the mutex
diff.

Note that an acceptable race can occur with this diff.  The value of
``bd_async'' and/or ``bd_sig'' might change between the bpf_wakeup()
call and the check.

ok?

Index: net/bpf.c
===================================================================
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.147
diff -u -p -r1.147 bpf.c
--- net/bpf.c   15 Aug 2016 07:20:14 -0000      1.147
+++ net/bpf.c   15 Aug 2016 13:55:04 -0000
@@ -57,6 +57,8 @@
 #include <sys/atomic.h>
 #include <sys/srp.h>
 #include <sys/specdev.h>
+#include <sys/selinfo.h>
+#include <sys/task.h>
 
 #include <net/if.h>
 #include <net/bpf.h>
@@ -103,6 +105,7 @@ int bpf_setif(struct bpf_d *, struct ifr
 int    bpfpoll(dev_t, int, struct proc *);
 int    bpfkqfilter(dev_t, struct knote *);
 void   bpf_wakeup(struct bpf_d *);
+void   bpf_wakeup_cb(void *);
 void   bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
            void (*)(const void *, void *, size_t), struct timeval *);
 void   bpf_reset_d(struct bpf_d *);
@@ -345,6 +348,7 @@ bpfopen(dev_t dev, int flag, int mode, s
        bd->bd_unit = unit;
        bd->bd_bufsize = bpf_bufsize;
        bd->bd_sig = SIGIO;
+       task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
 
        if (flag & FNONBLOCK)
                bd->bd_rtout = -1;
@@ -515,12 +519,28 @@ bpfread(dev_t dev, struct uio *uio, int 
 void
 bpf_wakeup(struct bpf_d *d)
 {
-       wakeup((caddr_t)d);
+       /*
+        * As long as csignal() and selwakeup() need to be protected
+        * by the KERNEL_LOCK() we have to delay the wakeup to
+        * another context to keep the hot path KERNEL_LOCK()-free.
+        */
+       bpf_get(d);
+       task_add(systq, &d->bd_wake_task);
+}
+
+void
+bpf_wakeup_cb(void *xd)
+{
+       struct bpf_d *d = xd;
+
+       KERNEL_ASSERT_LOCKED();
+
+       wakeup(d);
        if (d->bd_async && d->bd_sig)
-               csignal(d->bd_pgid, d->bd_sig,
-                   d->bd_siguid, d->bd_sigeuid);
+               csignal(d->bd_pgid, d->bd_sig, d->bd_siguid, d->bd_sigeuid);
 
        selwakeup(&d->bd_sel);
+       bpf_put(d);
 }
 
 int
Index: net/bpfdesc.h
===================================================================
RCS file: /cvs/src/sys/net/bpfdesc.h,v
retrieving revision 1.30
diff -u -p -r1.30 bpfdesc.h
--- net/bpfdesc.h       30 Mar 2016 12:33:10 -0000      1.30
+++ net/bpfdesc.h       15 Aug 2016 13:55:04 -0000
@@ -42,8 +42,6 @@
 
 #ifdef _KERNEL
 
-#include <sys/selinfo.h>
-
 /*
  * Descriptor associated with each open bpf file.
  */
@@ -90,6 +88,8 @@ struct bpf_d {
        struct selinfo  bd_sel;         /* bsd select info */
        int             bd_unit;        /* logical unit number */
        LIST_ENTRY(bpf_d) bd_list;      /* descriptor list */
+
+       struct task     bd_wake_task;   /* delay csignal() and selwakeup() */
 };
 
 /*

Reply via email to