It seems that most of the problems exposed by the introduction of the
NET_LOCK() are related to the non-recursive nature of the rwlock.  Some
known issues involve pflow(4), cloning interfaces an NFS.

Diff below makes use of a recursive-rwlock instead.  I just finished a
build on NFS with it, so I'd like to know if it works for your use case.

Please test and report back.

Index: kern/kern_rwlock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.27
diff -u -p -r1.27 kern_rwlock.c
--- kern/kern_rwlock.c  14 Mar 2015 07:33:42 -0000      1.27
+++ kern/kern_rwlock.c  3 Jan 2017 14:23:56 -0000
@@ -346,3 +346,23 @@ rrw_status(struct rrwlock *rrwl)
 {
        return (rw_status(&rrwl->rrwl_lock));
 }
+
+int
+rrw_release_all(struct rrwlock *rrwl)
+{
+       int rv;
+
+       rv = rrwl->rrwl_wcnt;
+       KASSERT(rv >= 1);
+       rrwl->rrwl_wcnt = 1;
+       rrw_exit(rrwl);
+
+       return (rv);
+}
+
+void
+rrw_acquire_count(struct rrwlock *rrwl, int count, int flags)
+{
+       while (count--)
+               rrw_enter(rrwl, flags);
+}
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.135
diff -u -p -r1.135 kern_synch.c
--- kern/kern_synch.c   13 Sep 2016 08:32:44 -0000      1.135
+++ kern/kern_synch.c   3 Jan 2017 14:07:23 -0000
@@ -231,27 +231,28 @@ msleep(const volatile void *ident, struc
  * entered the sleep queue we drop the it. After sleeping we re-lock.
  */
 int
-rwsleep(const volatile void *ident, struct rwlock *wl, int priority,
+rrwsleep(const volatile void *ident, struct rrwlock *rrwl, int priority,
     const char *wmesg, int timo)
 {
        struct sleep_state sls;
        int error, error1;
+       int count;
 
        KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
-       rw_assert_wrlock(wl);
+       KASSERT(rrw_status(rrwl) == RW_WRITE);
 
        sleep_setup(&sls, ident, priority, wmesg);
        sleep_setup_timeout(&sls, timo);
        sleep_setup_signal(&sls, priority);
 
-       rw_exit_write(wl);
+       count = rrw_release_all(rrwl);
 
        sleep_finish(&sls, 1);
        error1 = sleep_finish_timeout(&sls);
        error = sleep_finish_signal(&sls);
 
        if ((priority & PNORELOCK) == 0)
-               rw_enter_write(wl);
+               rrw_acquire_count(rrwl, count, RW_WRITE);
 
        /* Signal errors are higher priority than timeouts. */
        if (error == 0 && error1 != 0)
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.171
diff -u -p -r1.171 uipc_socket.c
--- kern/uipc_socket.c  29 Dec 2016 12:12:43 -0000      1.171
+++ kern/uipc_socket.c  3 Jan 2017 14:04:42 -0000
@@ -256,7 +256,7 @@ soclose(struct socket *so)
                            (so->so_state & SS_NBIO))
                                goto drop;
                        while (so->so_state & SS_ISCONNECTED) {
-                               error = tsleep(&so->so_timeo,
+                               error = rrwsleep(&so->so_timeo, &netlock,
                                    PSOCK | PCATCH, "netcls",
                                    so->so_linger * hz);
                                if (error)
@@ -1039,7 +1039,7 @@ sorflush(struct socket *so)
        struct sockbuf asb;
 
        sb->sb_flags |= SB_NOINTR;
-       (void) sblock(sb, M_WAITOK, NULL);
+       (void) sblock(sb, M_WAITOK, &netlock);
        socantrcvmore(so);
        sbunlock(sb);
        asb = *sb;
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.70
diff -u -p -r1.70 uipc_socket2.c
--- kern/uipc_socket2.c 29 Dec 2016 12:12:43 -0000      1.70
+++ kern/uipc_socket2.c 3 Jan 2017 14:06:54 -0000
@@ -53,7 +53,7 @@ u_long        sb_max = SB_MAX;                /* patchable */
 extern struct pool mclpools[];
 extern struct pool mbpool;
 
-int sbsleep(struct sockbuf *, struct rwlock *);
+int sbsleep(struct sockbuf *, struct rrwlock *);
 
 /*
  * Procedures to manipulate state flags of socket
@@ -276,18 +276,18 @@ sbwait(struct sockbuf *sb)
        NET_ASSERT_LOCKED();
 
        sb->sb_flagsintr |= SB_WAIT;
-       return (tsleep(&sb->sb_cc,
+       return (rrwsleep(&sb->sb_cc, &netlock,
            (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
            sb->sb_timeo));
 }
 
 int
-sbsleep(struct sockbuf *sb, struct rwlock *lock)
+sbsleep(struct sockbuf *sb, struct rrwlock *rrwl)
 {
        int error, prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
 
-       if (lock != NULL)
-               error = rwsleep(&sb->sb_flags, lock, prio, "netlck", 0);
+       if (rrwl != NULL)
+               error = rrwsleep(&sb->sb_flags, rrwl, prio, "netlck", 0);
        else
                error = tsleep(&sb->sb_flags, prio, "netlck", 0);
 
@@ -295,7 +295,7 @@ sbsleep(struct sockbuf *sb, struct rwloc
 }
 
 int
-sblock(struct sockbuf *sb, int wait, struct rwlock *lock)
+sblock(struct sockbuf *sb, int wait, struct rrwlock *rrwl)
 {
        int error;
 
@@ -310,7 +310,7 @@ sblock(struct sockbuf *sb, int wait, str
 
        while (sb->sb_flags & SB_LOCK) {
                sb->sb_flags |= SB_WANT;
-               error = sbsleep(sb, lock);
+               error = sbsleep(sb, rrwl);
                if (error)
                        return (error);
        }
Index: kern/uipc_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.144
diff -u -p -r1.144 uipc_syscalls.c
--- kern/uipc_syscalls.c        29 Dec 2016 12:12:43 -0000      1.144
+++ kern/uipc_syscalls.c        3 Jan 2017 14:04:26 -0000
@@ -296,7 +296,7 @@ redo:
                        head->so_error = ECONNABORTED;
                        break;
                }
-               error = tsleep(&head->so_timeo, PSOCK | PCATCH,
+               error = rrwsleep(&head->so_timeo, &netlock, PSOCK | PCATCH,
                    "netcon", 0);
                if (error) {
                        goto bad;
@@ -436,7 +436,7 @@ sys_connect(struct proc *p, void *v, reg
        }
        NET_LOCK(s);
        while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-               error = tsleep(&so->so_timeo, PSOCK | PCATCH,
+               error = rrwsleep(&so->so_timeo, &netlock, PSOCK | PCATCH,
                    "netcon2", 0);
                if (error) {
                        if (error == EINTR || error == ERESTART)
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.470
diff -u -p -r1.470 if.c
--- net/if.c    3 Jan 2017 13:11:55 -0000       1.470
+++ net/if.c    3 Jan 2017 14:03:51 -0000
@@ -229,6 +229,12 @@ struct taskq       *softnettq;
 
 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
 
+
+/*
+ * Serialize socket operations to ensure no new sleeping points
+ * are introduced in IP output paths.
+ */
+struct rrwlock netlock = RRWLOCK_INITIALIZER("netlock");
 /*
  * Network interface utility routines.
  */
Index: sys/rwlock.h
===================================================================
RCS file: /cvs/src/sys/sys/rwlock.h,v
retrieving revision 1.20
diff -u -p -r1.20 rwlock.h
--- sys/rwlock.h        21 Sep 2016 10:19:13 -0000      1.20
+++ sys/rwlock.h        3 Jan 2017 14:03:32 -0000
@@ -114,6 +114,8 @@ void        rw_assert_unlocked(struct rwlock *)
 #define rw_assert_unlocked(rwl)        ((void)0)
 #endif
 
+#define RRWLOCK_INITIALIZER(name)      { RWLOCK_INITIALIZER(name), 0 }
+
 int    rw_enter(struct rwlock *, int);
 void   rw_exit(struct rwlock *);
 int    rw_status(struct rwlock *);
@@ -122,6 +124,8 @@ void        rrw_init(struct rrwlock *, char *);
 int    rrw_enter(struct rrwlock *, int);
 void   rrw_exit(struct rrwlock *);
 int    rrw_status(struct rrwlock *);
+int    rrw_release_all(struct rrwlock *);
+void   rrw_acquire_count(struct rrwlock *, int, int);
 
 #endif /* _KERNEL */
 
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.67
diff -u -p -r1.67 socketvar.h
--- sys/socketvar.h     19 Dec 2016 08:36:50 -0000      1.67
+++ sys/socketvar.h     3 Jan 2017 14:05:59 -0000
@@ -222,7 +222,7 @@ struct rwlock;
  * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
  * Returns error without lock if sleep is interrupted.
  */
-int sblock(struct sockbuf *, int, struct rwlock *);
+int sblock(struct sockbuf *, int, struct rrwlock *);
 
 /* release lock on sockbuf sb */
 void sbunlock(struct sockbuf *);
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.121
diff -u -p -r1.121 systm.h
--- sys/systm.h 29 Dec 2016 12:12:44 -0000      1.121
+++ sys/systm.h 3 Jan 2017 14:07:33 -0000
@@ -247,13 +247,14 @@ int       sleep_finish_signal(struct sleep_sta
 void   sleep_queue_init(void);
 
 struct mutex;
-struct rwlock;
+struct rrwlock;
 void    wakeup_n(const volatile void *, int);
 void    wakeup(const volatile void *);
 #define wakeup_one(c) wakeup_n((c), 1)
 int    tsleep(const volatile void *, int, const char *, int);
 int    msleep(const volatile void *, struct mutex *, int,  const char*, int);
-int    rwsleep(const volatile void *, struct rwlock *, int, const char *, int);
+int    rrwsleep(const volatile void *, struct rrwlock *, int, const char *,
+           int);
 void   yield(void);
 
 void   wdog_register(int (*)(void *, int), void *);
@@ -291,18 +292,26 @@ int       uiomove(void *, size_t, struct uio *
 
 #if defined(_KERNEL)
 
+#include <sys/rwlock.h>
+
+extern struct rrwlock netlock;
+
 #define        NET_LOCK(s)                                                     
\
 do {                                                                   \
+       rrw_enter(&netlock, RW_WRITE);                                  \
        s = splsoftnet();                                               \
 } while (0)
 
 #define        NET_UNLOCK(s)                                                   
\
 do {                                                                   \
        splx(s);                                                        \
+       rrw_exit(&netlock);                                             \
 } while (0)
 
 #define        NET_ASSERT_LOCKED()                                             
\
 do {                                                                   \
+       if (rrw_status(&netlock) != RW_WRITE)                           \
+               splassert_fail(RW_WRITE, rrw_status(&netlock), __func__);\
        splsoftassert(IPL_SOFTNET);                                     \
 } while (0)
 

Reply via email to