I'd like to use a write lock to serialize accesses to ip_output(). This will be used to guarantee that atomic code sections in the socket layer stay atomic when the input/forwarding path won't run under KERNEL_LOCK().
For such purpose I'll have to convert some tsleep(9) to an msleep(9)-like function operating on a write lock. That's why I'd like to introduce rwsleep(9). I did not bother exporting a read variant of this function since I don't need it for the moment. ok? Index: sys/kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.134 diff -u -p -r1.134 kern_synch.c --- sys/kern/kern_synch.c 3 Sep 2016 15:06:06 -0000 1.134 +++ sys/kern/kern_synch.c 12 Sep 2016 08:41:23 -0000 @@ -226,6 +226,40 @@ msleep(const volatile void *ident, struc return (error); } +/* + * Same as tsleep, but if we have a rwlock provided, then once we've + * entered the sleep queue we drop the it. After sleeping we re-lock. + */ +int +rwsleep(const volatile void *ident, struct rwlock *wl, int priority, + const char *wmesg, int timo) +{ + struct sleep_state sls; + int error, error1; + + KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); + rw_assert_wrlock(wl); + + sleep_setup(&sls, ident, priority, wmesg); + sleep_setup_timeout(&sls, timo); + sleep_setup_signal(&sls, priority); + + rw_exit_write(wl); + + sleep_finish(&sls, 1); + error1 = sleep_finish_timeout(&sls); + error = sleep_finish_signal(&sls); + + if ((priority & PNORELOCK) == 0) + rw_enter_write(wl); + + /* Signal errors are higher priority than timeouts. */ + if (error == 0 && error1 != 0) + error = error1; + + return (error); +} + void sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio, const char *wmesg) Index: sys/sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v retrieving revision 1.116 diff -u -p -r1.116 systm.h --- sys/sys/systm.h 4 Sep 2016 09:22:29 -0000 1.116 +++ sys/sys/systm.h 12 Sep 2016 08:35:26 -0000 @@ -246,11 +246,13 @@ int sleep_finish_signal(struct sleep_sta void sleep_queue_init(void); struct mutex; +struct rwlock; void wakeup_n(const volatile void *, int); void wakeup(const volatile void *); #define wakeup_one(c) wakeup_n((c), 1) int tsleep(const volatile void *, int, const char *, int); int msleep(const volatile void *, struct mutex *, int, const char*, int); +int rwsleep(const volatile void *, struct rwlock *, int, const char *, int); void yield(void); void wdog_register(int (*)(void *, int), void *); Index: share/man/man9/tsleep.9 =================================================================== RCS file: /cvs/src/share/man/man9/tsleep.9,v retrieving revision 1.10 diff -u -p -r1.10 tsleep.9 --- share/man/man9/tsleep.9 14 Sep 2015 15:14:55 -0000 1.10 +++ share/man/man9/tsleep.9 12 Sep 2016 08:42:55 -0000 @@ -34,6 +34,7 @@ .Sh NAME .Nm tsleep , .Nm msleep , +.Nm rwsleep , .Nm wakeup , .Nm wakeup_n , .Nm wakeup_one @@ -45,6 +46,8 @@ .Fn tsleep "void *ident" "int priority" "const char *wmesg" "int timo" .Ft int .Fn msleep "void *ident" "struct mutex *mtx" "int priority" "const char *wmesg" "int timo" +.Ft int +.Fn rwsleep "void *ident" "struct rwlock *wl" "int priority" "const char *wmesg" "int timo" .Ft void .Fn wakeup "void *ident" .Ft void @@ -53,9 +56,10 @@ .Fn wakeup_one "void *ident" .Sh DESCRIPTION These functions implement voluntary context switching. -.Fn tsleep -and +.Fn tsleep , .Fn msleep +and +.Fn rwsleep are used throughout the kernel whenever processing in the current context cannot continue for any of the following reasons: .Bl -bullet -offset indent @@ -146,6 +150,22 @@ argument. .El .Pp The +.Fn rwsleep +function behaves just like +.Fn tsleep , +but takes an additional argument: +.Bl -tag -width priority +.It Fa wl +A write lock that will be unlocked when the process is safely +on the sleep queue. +The write lock will be relocked at the end of rwsleep unless the +.Dv PNORELOCK +flag is set in the +.Fa priority +argument. +.El +.Pp +The .Fn wakeup function will mark all processes which are currently sleeping on the identifier .Fa ident @@ -173,9 +193,10 @@ except that only .Fa count or one process, respectively, is marked runnable. .Sh RETURN VALUES -.Fn tsleep -and +.Fn tsleep , .Fn msleep +and +.Fn rwsleep return 0 if they return as a result of a .Fn wakeup . If they return as a result of a signal, the return value is