> Date: Wed, 8 Feb 2023 14:17:14 +0300
> From: Vitaliy Makkoveev <[email protected]>
>
> On Tue, Feb 07, 2023 at 05:42:40PM +0300, Vitaliy Makkoveev wrote:
> >
> > > > Otherwise, if you are concerning about serialized `p_sigmask' and
> > > > P_SIGSUSPEND, dosigsuspend() should be left under kernel lock:
> > > >
> > > > if (sigmask) {
> > > > KERNEL_LOCK();
> > > > dosigsuspend(p, *sigmask &~ sigcantmask);
> > > > KERNEL_UNLOCK();
> > > > }
> > >
> > > This is a hack but would allow progress. Now I would prefer if
> > > dosigsuspend() is made safe so that the KERNEL_LOCK is not needed. Which
> > > would also allow sigsuspend to be called with NOLOCK.
> > >
> >
> > Makes sense, but sigsuspend(2) will not be unlocked together with
> > select(2), so dosigsuspend() could be left under kernel lock for a
> > while.
> >
>
> Now, I want to keep dosigsuspend() under the kernel lock. To me it's
> better to unlock it together with sigsuspend(2) to separate possible
> signal related fallout. This still makes sense because signal mask is
> the optional arg for pselect(2) only. Except the local event data
> conversion, poll(2) internals are the same as select(2) internals, it
> makes sense to unlock them both.
>
> This doesn't mean I dropped dosigsuspend(), I'm still waiting feedback
> for barriers.
Splitting it this way makes sense to me.
> Index: sys/kern/sys_generic.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_generic.c,v
> retrieving revision 1.151
> diff -u -p -r1.151 sys_generic.c
> --- sys/kern/sys_generic.c 27 Dec 2022 20:13:03 -0000 1.151
> +++ sys/kern/sys_generic.c 8 Feb 2023 10:56:08 -0000
> @@ -596,15 +596,16 @@ dopselect(struct proc *p, int nd, fd_set
> struct timespec zerots = {};
> fd_mask bits[6];
> fd_set *pibits[3], *pobits[3];
> - int error, ncollected = 0, nevents = 0;
> + int error, nfiles, ncollected = 0, nevents = 0;
> u_int ni;
>
> if (nd < 0)
> return (EINVAL);
> - if (nd > p->p_fd->fd_nfiles) {
> - /* forgiving; slightly wrong */
> - nd = p->p_fd->fd_nfiles;
> - }
> +
> + nfiles = READ_ONCE(p->p_fd->fd_nfiles);
> + if (nd > nfiles)
> + nd = nfiles;
> +
> ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
> if (ni > sizeof(bits[0])) {
> caddr_t mbits;
> @@ -643,8 +644,11 @@ dopselect(struct proc *p, int nd, fd_set
> }
> #endif
>
> - if (sigmask)
> + if (sigmask) {
> + KERNEL_LOCK();
> dosigsuspend(p, *sigmask &~ sigcantmask);
> + KERNEL_UNLOCK();
> + }
>
> /* Register kqueue events */
> error = pselregister(p, pibits, pobits, nd, &nevents, &ncollected);
> @@ -948,8 +952,11 @@ doppoll(struct proc *p, struct pollfd *f
> if ((error = copyin(fds, pl, sz)) != 0)
> goto bad;
>
> - if (sigmask)
> + if (sigmask) {
> + KERNEL_LOCK();
> dosigsuspend(p, *sigmask &~ sigcantmask);
> + KERNEL_UNLOCK();
> + }
>
> /* Register kqueue events */
> ppollregister(p, pl, nfds, &nevents, &ncollected);
> Index: sys/kern/syscalls.master
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.239
> diff -u -p -r1.239 syscalls.master
> --- sys/kern/syscalls.master 7 Jan 2023 05:24:58 -0000 1.239
> +++ sys/kern/syscalls.master 8 Feb 2023 10:56:10 -0000
> @@ -165,7 +165,7 @@
> struct itimerval *oitv); }
> 70 STD NOLOCK { int sys_getitimer(int which, \
> struct itimerval *itv); }
> -71 STD { int sys_select(int nd, fd_set *in, fd_set *ou, \
> +71 STD NOLOCK { int sys_select(int nd, fd_set *in, fd_set *ou, \
> fd_set *ex, struct timeval *tv); }
> 72 STD NOLOCK { int sys_kevent(int fd, \
> const struct kevent *changelist, int nchanges, \
> @@ -230,10 +230,10 @@
> u_int flags, int atflags); }
> 108 STD NOLOCK { int sys_pledge(const char *promises, \
> const char *execpromises); }
> -109 STD { int sys_ppoll(struct pollfd *fds, \
> +109 STD NOLOCK { int sys_ppoll(struct pollfd *fds, \
> u_int nfds, const struct timespec *ts, \
> const sigset_t *mask); }
> -110 STD { int sys_pselect(int nd, fd_set *in, fd_set *ou, \
> +110 STD NOLOCK { int sys_pselect(int nd, fd_set *in, fd_set *ou, \
> fd_set *ex, const struct timespec *ts, \
> const sigset_t *mask); }
> 111 STD { int sys_sigsuspend(int mask); }
> @@ -448,7 +448,7 @@
> 250 STD NOLOCK { int sys_minherit(void *addr, size_t len, \
> int inherit); }
> 251 OBSOL rfork
> -252 STD { int sys_poll(struct pollfd *fds, \
> +252 STD NOLOCK { int sys_poll(struct pollfd *fds, \
> u_int nfds, int timeout); }
> 253 STD NOLOCK { int sys_issetugid(void); }
> 254 STD { int sys_lchown(const char *path, uid_t uid, gid_t
> gid); }
>