This diff sets klist lock for sockets. Since sockets use custom lock functions, the diff introduces socket_klistops for use with the klist interface.
In soo_kqfilter(), the diff adds socket locking for accessing so_options and changing the klist. The patch replaces the use of the SB_KNOTE flag with a klist_empty() predicate in sb_notify(). The flag is just a roundabout way of determining whether the klist has elements. The removal of the flag makes sb_flagsintr redundant. As a result of this klist change, it becomes more explicit that the socket has to be locked when calling selwakeup() on sb_sel. If the lock is not held, the lock assert in knote() will trigger. The socket-related f_event routines already expect the lock when NOTE_SUBMIT is set in the hints parameter. OK? Index: sys/kern/uipc_socket.c =================================================================== RCS file: src/sys/kern/uipc_socket.c,v retrieving revision 1.252 diff -u -p -r1.252 uipc_socket.c --- sys/kern/uipc_socket.c 25 Dec 2020 12:59:52 -0000 1.252 +++ sys/kern/uipc_socket.c 5 Jan 2021 16:46:42 -0000 @@ -150,6 +150,8 @@ socreate(int dom, struct socket **aso, i if (prp->pr_type != type) return (EPROTOTYPE); so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); + klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so); + klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so); sigio_init(&so->so_sigio); TAILQ_INIT(&so->so_q0); TAILQ_INIT(&so->so_q); @@ -239,6 +241,8 @@ sofree(struct socket *so, int s) } } sigio_free(&so->so_sigio); + klist_free(&so->so_rcv.sb_sel.si_note); + klist_free(&so->so_snd.sb_sel.si_note); #ifdef SOCKET_SPLICE if (so->so_sp) { if (issplicedback(so)) { @@ -2017,9 +2021,9 @@ soo_kqfilter(struct file *fp, struct kno { struct socket *so = kn->kn_fp->f_data; struct sockbuf *sb; + int s; - KERNEL_ASSERT_LOCKED(); - + s = solock(so); switch (kn->kn_filter) { case EVFILT_READ: if (so->so_options & SO_ACCEPTCONN) @@ -2037,11 +2041,12 @@ soo_kqfilter(struct file *fp, struct kno sb = &so->so_rcv; break; default: + sounlock(so, s); return (EINVAL); } klist_insert_locked(&sb->sb_sel.si_note, kn); - sb->sb_flagsintr |= SB_KNOTE; + sounlock(so, s); return (0); } @@ -2051,11 +2056,7 @@ filt_sordetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - KERNEL_ASSERT_LOCKED(); - - klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn); - if (klist_empty(&so->so_rcv.sb_sel.si_note)) - so->so_rcv.sb_flagsintr &= ~SB_KNOTE; + klist_remove(&so->so_rcv.sb_sel.si_note, kn); } int @@ -2104,11 +2105,7 @@ filt_sowdetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; - KERNEL_ASSERT_LOCKED(); - - klist_remove_locked(&so->so_snd.sb_sel.si_note, kn); - if (klist_empty(&so->so_snd.sb_sel.si_note)) - so->so_snd.sb_flagsintr &= ~SB_KNOTE; + klist_remove(&so->so_snd.sb_sel.si_note, kn); } int @@ -2159,6 +2156,36 @@ filt_solisten(struct knote *kn, long hin return (kn->kn_data != 0); } +static void +klist_soassertlk(void *arg) +{ + struct socket *so = arg; + + soassertlocked(so); +} + +static int +klist_solock(void *arg) +{ + struct socket *so = arg; + + return (solock(so)); +} + +static void +klist_sounlock(void *arg, int ls) +{ + struct socket *so = arg; + + sounlock(so, ls); +} + +const struct klistops socket_klistops = { + .klo_assertlk = klist_soassertlk, + .klo_lock = klist_solock, + .klo_unlock = klist_sounlock, +}; + #ifdef DDB void sobuf_print(struct sockbuf *, @@ -2179,7 +2206,6 @@ sobuf_print(struct sockbuf *sb, (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); (*pr)("\tsb_sel: ...\n"); - (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr); (*pr)("\tsb_flags: %i\n", sb->sb_flags); (*pr)("\tsb_timeo_nsecs: %llu\n", sb->sb_timeo_nsecs); } Index: sys/kern/uipc_socket2.c =================================================================== RCS file: src/sys/kern/uipc_socket2.c,v retrieving revision 1.104 diff -u -p -r1.104 uipc_socket2.c --- sys/kern/uipc_socket2.c 11 Apr 2020 14:07:06 -0000 1.104 +++ sys/kern/uipc_socket2.c 5 Jan 2021 16:46:42 -0000 @@ -186,6 +186,8 @@ sonewconn(struct socket *head, int conns so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; so->so_rcv.sb_timeo_nsecs = head->so_rcv.sb_timeo_nsecs; + klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so); + klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so); sigio_init(&so->so_sigio); sigio_copy(&so->so_sigio, &head->so_sigio); @@ -193,6 +195,8 @@ sonewconn(struct socket *head, int conns if ((*so->so_proto->pr_attach)(so, 0)) { (void) soqremque(so, soqueue); sigio_free(&so->so_sigio); + klist_free(&so->so_rcv.sb_sel.si_note); + klist_free(&so->so_snd.sb_sel.si_note); pool_put(&socket_pool, so); return (NULL); } Index: sys/miscfs/fifofs/fifo_vnops.c =================================================================== RCS file: src/sys/miscfs/fifofs/fifo_vnops.c,v retrieving revision 1.78 diff -u -p -r1.78 fifo_vnops.c --- sys/miscfs/fifofs/fifo_vnops.c 25 Dec 2020 12:59:52 -0000 1.78 +++ sys/miscfs/fifofs/fifo_vnops.c 5 Jan 2021 16:46:42 -0000 @@ -532,8 +532,7 @@ fifo_kqfilter(void *v) ap->a_kn->kn_hook = so; - klist_insert_locked(&sb->sb_sel.si_note, ap->a_kn); - sb->sb_flagsintr |= SB_KNOTE; + klist_insert(&sb->sb_sel.si_note, ap->a_kn); return (0); } @@ -543,9 +542,7 @@ filt_fifordetach(struct knote *kn) { struct socket *so = (struct socket *)kn->kn_hook; - klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn); - if (klist_empty(&so->so_rcv.sb_sel.si_note)) - so->so_rcv.sb_flagsintr &= ~SB_KNOTE; + klist_remove(&so->so_rcv.sb_sel.si_note, kn); } int @@ -579,9 +576,7 @@ filt_fifowdetach(struct knote *kn) { struct socket *so = (struct socket *)kn->kn_hook; - klist_remove_locked(&so->so_snd.sb_sel.si_note, kn); - if (klist_empty(&so->so_snd.sb_sel.si_note)) - so->so_snd.sb_flagsintr &= ~SB_KNOTE; + klist_remove(&so->so_snd.sb_sel.si_note, kn); } int Index: sys/sys/event.h =================================================================== RCS file: src/sys/sys/event.h,v retrieving revision 1.52 diff -u -p -r1.52 event.h --- sys/sys/event.h 25 Dec 2020 12:59:53 -0000 1.52 +++ sys/sys/event.h 5 Jan 2021 16:46:42 -0000 @@ -226,6 +226,7 @@ struct timespec; extern const struct filterops sig_filtops; extern const struct filterops dead_filtops; +extern const struct klistops socket_klistops; extern void kqpoll_init(void); extern void kqpoll_exit(void); Index: sys/sys/socketvar.h =================================================================== RCS file: src/sys/sys/socketvar.h,v retrieving revision 1.91 diff -u -p -r1.91 socketvar.h --- sys/sys/socketvar.h 15 Jan 2020 13:17:35 -0000 1.91 +++ sys/sys/socketvar.h 5 Jan 2021 16:46:42 -0000 @@ -113,7 +113,6 @@ struct socket { short sb_flags; /* flags, see below */ /* End area that is zeroed on flush. */ #define sb_endzero sb_flags - int sb_flagsintr; /* flags, changed atomically */ uint64_t sb_timeo_nsecs;/* timeout for read/write */ struct selinfo sb_sel; /* process selecting read/write */ } so_rcv, so_snd; @@ -125,7 +124,6 @@ struct socket { #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ #define SB_SPLICE 0x20 /* buffer is splice source or drain */ #define SB_NOINTR 0x40 /* operations not interruptible */ -#define SB_KNOTE 0x80 /* kernel note attached */ void (*so_upcall)(struct socket *so, caddr_t arg, int waitf); caddr_t so_upcallarg; /* Arg for above */ @@ -177,11 +175,10 @@ void soassertlocked(struct socket *); static inline int sb_notify(struct socket *so, struct sockbuf *sb) { - int flags = (sb->sb_flags | sb->sb_flagsintr); - KASSERT(sb == &so->so_rcv || sb == &so->so_snd); soassertlocked(so); - return ((flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE|SB_KNOTE)) != 0); + return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE)) != 0 || + !klist_empty(&sb->sb_sel.si_note)); } /* Index: usr.bin/netstat/inet.c =================================================================== RCS file: src/usr.bin/netstat/inet.c,v retrieving revision 1.169 diff -u -p -r1.169 inet.c --- usr.bin/netstat/inet.c 23 Dec 2020 22:20:18 -0000 1.169 +++ usr.bin/netstat/inet.c 5 Jan 2021 16:46:42 -0000 @@ -1378,7 +1378,6 @@ sockbuf_dump(struct sockbuf *sb, const c p("%lu", sb_mbmax, ", "); p("%ld", sb_lowat, "\n "); printf("%s ", name); - p("%#.8x", sb_flagsintr, ", "); p("%#.4x", sb_flags, ", "); p("%llu", sb_timeo_nsecs, "\n "); #undef p