This diff sets klist lock for sockets. Since sockets use custom lock
functions, the diff introduces socket_klistops for use with the klist
interface.

In soo_kqfilter(), the diff adds socket locking for accessing
so_options and changing the klist.

The patch replaces the use of the SB_KNOTE flag with a klist_empty()
predicate in sb_notify(). The flag is just a roundabout way of
determining whether the klist has elements. The removal of the flag
makes sb_flagsintr redundant.

As a result of this klist change, it becomes more explicit that the
socket has to be locked when calling selwakeup() on sb_sel. If the lock
is not held, the lock assert in knote() will trigger. The socket-related
f_event routines already expect the lock when NOTE_SUBMIT is set in
the hints parameter.

OK?

Index: sys/kern/uipc_socket.c
===================================================================
RCS file: src/sys/kern/uipc_socket.c,v
retrieving revision 1.252
diff -u -p -r1.252 uipc_socket.c
--- sys/kern/uipc_socket.c      25 Dec 2020 12:59:52 -0000      1.252
+++ sys/kern/uipc_socket.c      5 Jan 2021 16:46:42 -0000
@@ -150,6 +150,8 @@ socreate(int dom, struct socket **aso, i
        if (prp->pr_type != type)
                return (EPROTOTYPE);
        so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
+       klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
+       klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
        sigio_init(&so->so_sigio);
        TAILQ_INIT(&so->so_q0);
        TAILQ_INIT(&so->so_q);
@@ -239,6 +241,8 @@ sofree(struct socket *so, int s)
                }
        }
        sigio_free(&so->so_sigio);
+       klist_free(&so->so_rcv.sb_sel.si_note);
+       klist_free(&so->so_snd.sb_sel.si_note);
 #ifdef SOCKET_SPLICE
        if (so->so_sp) {
                if (issplicedback(so)) {
@@ -2017,9 +2021,9 @@ soo_kqfilter(struct file *fp, struct kno
 {
        struct socket *so = kn->kn_fp->f_data;
        struct sockbuf *sb;
+       int s;
 
-       KERNEL_ASSERT_LOCKED();
-
+       s = solock(so);
        switch (kn->kn_filter) {
        case EVFILT_READ:
                if (so->so_options & SO_ACCEPTCONN)
@@ -2037,11 +2041,12 @@ soo_kqfilter(struct file *fp, struct kno
                sb = &so->so_rcv;
                break;
        default:
+               sounlock(so, s);
                return (EINVAL);
        }
 
        klist_insert_locked(&sb->sb_sel.si_note, kn);
-       sb->sb_flagsintr |= SB_KNOTE;
+       sounlock(so, s);
 
        return (0);
 }
@@ -2051,11 +2056,7 @@ filt_sordetach(struct knote *kn)
 {
        struct socket *so = kn->kn_fp->f_data;
 
-       KERNEL_ASSERT_LOCKED();
-
-       klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
-       if (klist_empty(&so->so_rcv.sb_sel.si_note))
-               so->so_rcv.sb_flagsintr &= ~SB_KNOTE;
+       klist_remove(&so->so_rcv.sb_sel.si_note, kn);
 }
 
 int
@@ -2104,11 +2105,7 @@ filt_sowdetach(struct knote *kn)
 {
        struct socket *so = kn->kn_fp->f_data;
 
-       KERNEL_ASSERT_LOCKED();
-
-       klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
-       if (klist_empty(&so->so_snd.sb_sel.si_note))
-               so->so_snd.sb_flagsintr &= ~SB_KNOTE;
+       klist_remove(&so->so_snd.sb_sel.si_note, kn);
 }
 
 int
@@ -2159,6 +2156,36 @@ filt_solisten(struct knote *kn, long hin
        return (kn->kn_data != 0);
 }
 
+static void
+klist_soassertlk(void *arg)
+{
+       struct socket *so = arg;
+
+       soassertlocked(so);
+}
+
+static int
+klist_solock(void *arg)
+{
+       struct socket *so = arg;
+
+       return (solock(so));
+}
+
+static void
+klist_sounlock(void *arg, int ls)
+{
+       struct socket *so = arg;
+
+       sounlock(so, ls);
+}
+
+const struct klistops socket_klistops = {
+       .klo_assertlk   = klist_soassertlk,
+       .klo_lock       = klist_solock,
+       .klo_unlock     = klist_sounlock,
+};
+
 #ifdef DDB
 void
 sobuf_print(struct sockbuf *,
@@ -2179,7 +2206,6 @@ sobuf_print(struct sockbuf *sb,
        (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail);
        (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord);
        (*pr)("\tsb_sel: ...\n");
-       (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr);
        (*pr)("\tsb_flags: %i\n", sb->sb_flags);
        (*pr)("\tsb_timeo_nsecs: %llu\n", sb->sb_timeo_nsecs);
 }
Index: sys/kern/uipc_socket2.c
===================================================================
RCS file: src/sys/kern/uipc_socket2.c,v
retrieving revision 1.104
diff -u -p -r1.104 uipc_socket2.c
--- sys/kern/uipc_socket2.c     11 Apr 2020 14:07:06 -0000      1.104
+++ sys/kern/uipc_socket2.c     5 Jan 2021 16:46:42 -0000
@@ -186,6 +186,8 @@ sonewconn(struct socket *head, int conns
        so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
        so->so_rcv.sb_timeo_nsecs = head->so_rcv.sb_timeo_nsecs;
 
+       klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
+       klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
        sigio_init(&so->so_sigio);
        sigio_copy(&so->so_sigio, &head->so_sigio);
 
@@ -193,6 +195,8 @@ sonewconn(struct socket *head, int conns
        if ((*so->so_proto->pr_attach)(so, 0)) {
                (void) soqremque(so, soqueue);
                sigio_free(&so->so_sigio);
+               klist_free(&so->so_rcv.sb_sel.si_note);
+               klist_free(&so->so_snd.sb_sel.si_note);
                pool_put(&socket_pool, so);
                return (NULL);
        }
Index: sys/miscfs/fifofs/fifo_vnops.c
===================================================================
RCS file: src/sys/miscfs/fifofs/fifo_vnops.c,v
retrieving revision 1.78
diff -u -p -r1.78 fifo_vnops.c
--- sys/miscfs/fifofs/fifo_vnops.c      25 Dec 2020 12:59:52 -0000      1.78
+++ sys/miscfs/fifofs/fifo_vnops.c      5 Jan 2021 16:46:42 -0000
@@ -532,8 +532,7 @@ fifo_kqfilter(void *v)
 
        ap->a_kn->kn_hook = so;
 
-       klist_insert_locked(&sb->sb_sel.si_note, ap->a_kn);
-       sb->sb_flagsintr |= SB_KNOTE;
+       klist_insert(&sb->sb_sel.si_note, ap->a_kn);
 
        return (0);
 }
@@ -543,9 +542,7 @@ filt_fifordetach(struct knote *kn)
 {
        struct socket *so = (struct socket *)kn->kn_hook;
 
-       klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
-       if (klist_empty(&so->so_rcv.sb_sel.si_note))
-               so->so_rcv.sb_flagsintr &= ~SB_KNOTE;
+       klist_remove(&so->so_rcv.sb_sel.si_note, kn);
 }
 
 int
@@ -579,9 +576,7 @@ filt_fifowdetach(struct knote *kn)
 {
        struct socket *so = (struct socket *)kn->kn_hook;
 
-       klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
-       if (klist_empty(&so->so_snd.sb_sel.si_note))
-               so->so_snd.sb_flagsintr &= ~SB_KNOTE;
+       klist_remove(&so->so_snd.sb_sel.si_note, kn);
 }
 
 int
Index: sys/sys/event.h
===================================================================
RCS file: src/sys/sys/event.h,v
retrieving revision 1.52
diff -u -p -r1.52 event.h
--- sys/sys/event.h     25 Dec 2020 12:59:53 -0000      1.52
+++ sys/sys/event.h     5 Jan 2021 16:46:42 -0000
@@ -226,6 +226,7 @@ struct timespec;
 
 extern const struct filterops sig_filtops;
 extern const struct filterops dead_filtops;
+extern const struct klistops socket_klistops;
 
 extern void    kqpoll_init(void);
 extern void    kqpoll_exit(void);
Index: sys/sys/socketvar.h
===================================================================
RCS file: src/sys/sys/socketvar.h,v
retrieving revision 1.91
diff -u -p -r1.91 socketvar.h
--- sys/sys/socketvar.h 15 Jan 2020 13:17:35 -0000      1.91
+++ sys/sys/socketvar.h 5 Jan 2021 16:46:42 -0000
@@ -113,7 +113,6 @@ struct socket {
                short   sb_flags;       /* flags, see below */
 /* End area that is zeroed on flush. */
 #define        sb_endzero      sb_flags
-               int     sb_flagsintr;   /* flags, changed atomically */
                uint64_t sb_timeo_nsecs;/* timeout for read/write */
                struct  selinfo sb_sel; /* process selecting read/write */
        } so_rcv, so_snd;
@@ -125,7 +124,6 @@ struct socket {
 #define        SB_ASYNC        0x10            /* ASYNC I/O, need signals */
 #define        SB_SPLICE       0x20            /* buffer is splice source or 
drain */
 #define        SB_NOINTR       0x40            /* operations not interruptible 
*/
-#define        SB_KNOTE        0x80            /* kernel note attached */
 
        void    (*so_upcall)(struct socket *so, caddr_t arg, int waitf);
        caddr_t so_upcallarg;           /* Arg for above */
@@ -177,11 +175,10 @@ void      soassertlocked(struct socket *);
 static inline int
 sb_notify(struct socket *so, struct sockbuf *sb)
 {
-       int flags = (sb->sb_flags | sb->sb_flagsintr);
-
        KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
        soassertlocked(so);
-       return ((flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE|SB_KNOTE)) != 0);
+       return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE)) != 0 ||
+           !klist_empty(&sb->sb_sel.si_note));
 }
 
 /*
Index: usr.bin/netstat/inet.c
===================================================================
RCS file: src/usr.bin/netstat/inet.c,v
retrieving revision 1.169
diff -u -p -r1.169 inet.c
--- usr.bin/netstat/inet.c      23 Dec 2020 22:20:18 -0000      1.169
+++ usr.bin/netstat/inet.c      5 Jan 2021 16:46:42 -0000
@@ -1378,7 +1378,6 @@ sockbuf_dump(struct sockbuf *sb, const c
        p("%lu", sb_mbmax, ", ");
        p("%ld", sb_lowat, "\n ");
        printf("%s ", name);
-       p("%#.8x", sb_flagsintr, ", ");
        p("%#.4x", sb_flags, ", ");
        p("%llu", sb_timeo_nsecs, "\n ");
 #undef p

Reply via email to