On Thu, May 20, 2021 at 11:35:32AM +0200, Martin Pieuchot wrote: > On 18/05/21(Tue) 14:22, Visa Hankala wrote: > > This diff adds f_modify and f_process callbacks to socket event filters. > > As a result, socket events are handled using the non-legacy paths in > > filter_modify() and filter_process() of kern_event.c This a step toward > > MP-safety. However, everything still runs under the kernel lock. > > > > The change has three intended effects: > > > > * Socket events are handled without raising the system priority level. > > This makes the activity observable with btrace(8). > > > > * kqueue itself no longer calls f_event of socket filterops, which > > allows replacing the conditional, NOTE_SUBMIT-based locking with > > a fixed call pattern. > > I love this. > > > * The state of a socket event is now always rechecked before delivery > > to user. Before, the recheck was skipped if the event was registered > > with EV_ONESHOT. > > To me this sounds sane. I can't think of a way to rely on the current > behavior. However if there's an easy way to split these changes in two > commits, I'd prefer to stay on the safe side.
Below is an updated diff that preserves the current EV_ONESHOT behaviour. I have just adapted a part of the compatibility logic from function filter_process(). When f_process is given a non-NULL kev argument, it is known that the callback is invoked from kqueue_scan(). If kev is NULL, kqueue_register() is checking if the knote should be activated and there is no intent to deliver the event right now. Index: kern/uipc_socket.c =================================================================== RCS file: src/sys/kern/uipc_socket.c,v retrieving revision 1.261 diff -u -p -r1.261 uipc_socket.c --- kern/uipc_socket.c 13 May 2021 19:43:11 -0000 1.261 +++ kern/uipc_socket.c 20 May 2021 14:01:18 -0000 @@ -70,15 +70,26 @@ void sorflush(struct socket *); void filt_sordetach(struct knote *kn); int filt_soread(struct knote *kn, long hint); +int filt_soreadmodify(struct kevent *kev, struct knote *kn); +int filt_soreadprocess(struct knote *kn, struct kevent *kev); +int filt_soread_common(struct knote *kn, struct socket *so); void filt_sowdetach(struct knote *kn); int filt_sowrite(struct knote *kn, long hint); +int filt_sowritemodify(struct kevent *kev, struct knote *kn); +int filt_sowriteprocess(struct knote *kn, struct kevent *kev); +int filt_sowrite_common(struct knote *kn, struct socket *so); int filt_solisten(struct knote *kn, long hint); +int filt_solistenmodify(struct kevent *kev, struct knote *kn); +int filt_solistenprocess(struct knote *kn, struct kevent *kev); +int filt_solisten_common(struct knote *kn, struct socket *so); const struct filterops solisten_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_solisten, + .f_modify = filt_solistenmodify, + .f_process = filt_solistenprocess, }; const struct filterops soread_filtops = { @@ -86,6 +97,8 @@ const struct filterops soread_filtops = .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soread, + .f_modify = filt_soreadmodify, + .f_process = filt_soreadprocess, }; const struct filterops sowrite_filtops = { @@ -93,6 +106,8 @@ const struct filterops sowrite_filtops = .f_attach = NULL, .f_detach = filt_sowdetach, .f_event = filt_sowrite, + .f_modify = filt_sowritemodify, + .f_process = filt_sowriteprocess, }; const struct filterops soexcept_filtops = { @@ -100,6 +115,8 @@ const struct filterops soexcept_filtops .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soread, + .f_modify = filt_soreadmodify, + .f_process = filt_soreadprocess, }; #ifndef SOMINCONN @@ -2056,13 +2073,12 @@ filt_sordetach(struct knote *kn) } int -filt_soread(struct knote *kn, long hint) +filt_soread_common(struct knote *kn, struct socket *so) { - struct socket *so = kn->kn_fp->f_data; - int s, rv = 0; + int rv = 0; + + soassertlocked(so); - if ((hint & NOTE_SUBMIT) == 0) - s = solock(so); kn->kn_data = so->so_rcv.sb_cc; #ifdef SOCKET_SPLICE if (isspliced(so)) { @@ -2090,12 +2106,50 @@ filt_soread(struct knote *kn, long hint) } else { rv = (kn->kn_data >= so->so_rcv.sb_lowat); } - if ((hint & NOTE_SUBMIT) == 0) - sounlock(so, s); return rv; } +int +filt_soread(struct knote *kn, long hint) +{ + struct socket *so = kn->kn_fp->f_data; + + return (filt_soread_common(kn, so)); +} + +int +filt_soreadmodify(struct kevent *kev, struct knote *kn) +{ + struct socket *so = kn->kn_fp->f_data; + int rv, s; + + s = solock(so); + knote_modify(kev, kn); + rv = filt_soread_common(kn, so); + sounlock(so, s); + + return (rv); +} + +int +filt_soreadprocess(struct knote *kn, struct kevent *kev) +{ + struct socket *so = kn->kn_fp->f_data; + int rv, s; + + s = solock(so); + if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) + rv = 1; + else + rv = filt_soread_common(kn, so); + if (rv != 0) + knote_submit(kn, kev); + sounlock(so, s); + + return (rv); +} + void filt_sowdetach(struct knote *kn) { @@ -2107,13 +2161,12 @@ filt_sowdetach(struct knote *kn) } int -filt_sowrite(struct knote *kn, long hint) +filt_sowrite_common(struct knote *kn, struct socket *so) { - struct socket *so = kn->kn_fp->f_data; - int s, rv; + int rv; + + soassertlocked(so); - if ((hint & NOTE_SUBMIT) == 0) - s = solock(so); kn->kn_data = sbspace(so, &so->so_snd); if (so->so_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; @@ -2133,27 +2186,100 @@ filt_sowrite(struct knote *kn, long hint } else { rv = (kn->kn_data >= so->so_snd.sb_lowat); } - if ((hint & NOTE_SUBMIT) == 0) - sounlock(so, s); return (rv); } int -filt_solisten(struct knote *kn, long hint) +filt_sowrite(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; - int s; - if ((hint & NOTE_SUBMIT) == 0) - s = solock(so); + return (filt_sowrite_common(kn, so)); +} + +int +filt_sowritemodify(struct kevent *kev, struct knote *kn) +{ + struct socket *so = kn->kn_fp->f_data; + int rv, s; + + s = solock(so); + knote_modify(kev, kn); + rv = filt_sowrite_common(kn, so); + sounlock(so, s); + + return (rv); +} + +int +filt_sowriteprocess(struct knote *kn, struct kevent *kev) +{ + struct socket *so = kn->kn_fp->f_data; + int rv, s; + + s = solock(so); + if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) + rv = 1; + else + rv = filt_sowrite_common(kn, so); + if (rv != 0) + knote_submit(kn, kev); + sounlock(so, s); + + return (rv); +} + +int +filt_solisten_common(struct knote *kn, struct socket *so) +{ + soassertlocked(so); + kn->kn_data = so->so_qlen; - if ((hint & NOTE_SUBMIT) == 0) - sounlock(so, s); return (kn->kn_data != 0); } +int +filt_solisten(struct knote *kn, long hint) +{ + struct socket *so = kn->kn_fp->f_data; + + return (filt_solisten_common(kn, so)); +} + +int +filt_solistenmodify(struct kevent *kev, struct knote *kn) +{ + struct socket *so = kn->kn_fp->f_data; + int rv, s; + + s = solock(so); + knote_modify(kev, kn); + rv = filt_solisten_common(kn, so); + sounlock(so, s); + + return (rv); +} + +int +filt_solistenprocess(struct knote *kn, struct kevent *kev) +{ + struct socket *so = kn->kn_fp->f_data; + int rv, s; + + s = solock(so); + if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) + rv = 1; + else + rv = filt_solisten_common(kn, so); + if (rv != 0) + knote_submit(kn, kev); + sounlock(so, s); + + return (rv); +} + #ifdef DDB void sobuf_print(struct sockbuf *, Index: kern/uipc_syscalls.c =================================================================== RCS file: src/sys/kern/uipc_syscalls.c,v retrieving revision 1.190 diff -u -p -r1.190 uipc_syscalls.c --- kern/uipc_syscalls.c 13 May 2021 17:31:59 -0000 1.190 +++ kern/uipc_syscalls.c 20 May 2021 14:01:18 -0000 @@ -308,7 +308,7 @@ doaccept(struct proc *p, int sock, struc : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0); /* connection has been removed from the listen queue */ - KNOTE(&head->so_rcv.sb_sel.si_note, NOTE_SUBMIT); + KNOTE(&head->so_rcv.sb_sel.si_note, 0); fp->f_type = DTYPE_SOCKET; fp->f_flag = FREAD | FWRITE | nflag;