On Tue, Feb 07, 2023 at 04:06:39PM +0100, Mark Kettenis wrote:
> > Date: Tue, 7 Feb 2023 14:29:13 +0100
> > From: Claudio Jeker <[email protected]>
> >
> > On Mon, Feb 06, 2023 at 08:28:38PM +0300, Vitaliy Makkoveev wrote:
> > > On Mon, Feb 06, 2023 at 11:01:00AM +0100, Claudio Jeker wrote:
> > > > On Sat, Feb 04, 2023 at 01:24:58AM +0300, Vitaliy Makkoveev wrote:
> > > > > Hi,
> > > > >
> > > > > kevent(2) system call is ulocked more than year ago. Since select(2)
> > > > > and pselect(2) are kqueue(2)/kevent(2) wrappers, it makes sense to
> > > > > unlock them too. select(2) does the temporary kernel event queue
> > > > > initialization for this call only and does queue scan with events
> > > > > conversion between kevent(2) and select(2) data formats.
> > > > >
> > > > > pselect(2) is the same, but it also sets provided signal mask.
> > > > > sigset_t
> > > > > is unsigned int and only current thread does `p_sigmask' and
> > > > > `p_oldmask'
> > > > > modification, so dosigsuspend() call outside kernel lock is safe.
> > > > > claudio@, is this true? If so, I want to mark `p_sigmask' and
> > > > > `p_oldmask' as atomic in proc structure description.
> > > >
> > > > p_sigmask is already marked atomic (and is accessed by other processes
> > > > concurrently in ptsignal()). p_oldmask is only accessed by curproc which
> > > > should probably be marked differently. Using atomic for a pure local
> > > > variable is wrong. Not sure if we have a special letter for this.
> > > >
> > >
> > > We have 'o' letter for this purpose, and we use it for `p_kq'
> > > `p_kq_serial', so it makes sense to mark `p_oldmask' too.
> >
> > Works for me.
> >
> > > > Now dosigsuspend() changes two states, p_flag and the p_sigmask.
> > > > p->p_oldmask = p->p_sigmask;
> > > > atomic_setbits_int(&p->p_flag, P_SIGSUSPEND);
> > > > p->p_sigmask = newmask;
> > > >
> > > > and ptsignal() does this:
> > > > TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
> > > > ...
> > > > /* skip threads that have the signal blocked */
> > > > if ((q->p_sigmask & mask) != 0)
> > > > continue;
> > > > ...
> > > > if (q->p_flag & P_SIGSUSPEND)
> > > > break;
> > > > }
> > > >
> > > > Since this would run concurrently with your change I wonder if the order
> > > > in dosigsupend() is the wrong way around. I think p->p_sigmask needs to
> > > > be
> > > > set first. Currently the order does not matter since both ptsignal()
> > > > and
> > > > dosigsuspend() use the KERNEL_LOCK.
> > > >
> > > > If we change the order do we need some additional memory barriers to
> > > > ensure the compiler/cpu reorder the store order? It this enough to solve
> > > > the data dependency between dosigsuspend() and ptsignal()?
> > > >
> > >
> > > I could be wrong, but membar_exit_before_atomic(9) between `p_sigmask'
> > > assignment and atomic P_SIGSUSPEND flag setting should be enough for
> > > that:
> > >
> > > p->p_oldmask = p->p_sigmask;
> > > p->p_sigmask = newmask;
> > > membar_exit_before_atomic();
> > > atomic_setbits_int(&p->p_flag, P_SIGSUSPEND);
> > >
> >
> > Is this good enough? Do we need a barrier in ptsignal() as well to ensure
> > that the order of operation (p_sigmask check before P_SIGSUSPEND check)
> > remains in order?
>
> I don't think that is the right barrier. This should be a
> membar_producer() call. We don't have a "before-atomic" variant of
> that, but on amd64 it doesn't actually issue a barrier instruction
> anyway.
>
> And yes this means there needs to be a membar_consumer() on the other
> side. Between the P_SIGSUSPEND check and and reading p_sigmask. And
> the P_SIGSUSPEND check needs to come first.
>
I want to keep checks within foreach loop as is, so I used local `flags'
variable to load `p_flags'. Did I used membar_producer() in the right
place?
Index: sys/kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.304
diff -u -p -r1.304 kern_sig.c
--- sys/kern/kern_sig.c 31 Jan 2023 15:18:56 -0000 1.304
+++ sys/kern/kern_sig.c 7 Feb 2023 17:17:44 -0000
@@ -504,8 +504,9 @@ dosigsuspend(struct proc *p, sigset_t ne
KASSERT(p == curproc);
p->p_oldmask = p->p_sigmask;
- atomic_setbits_int(&p->p_flag, P_SIGSUSPEND);
p->p_sigmask = newmask;
+ membar_producer();
+ atomic_setbits_int(&p->p_flag, P_SIGSUSPEND);
}
/*
@@ -962,8 +963,14 @@ ptsignal(struct proc *p, int signum, enu
* main thread.
*/
TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
+ int flags;
+
+ flags = q->p_flag;
+
+ membar_consumer();
+
/* ignore exiting threads */
- if (q->p_flag & P_WEXIT)
+ if (flags & P_WEXIT)
continue;
/* skip threads that have the signal blocked */
@@ -978,7 +985,7 @@ ptsignal(struct proc *p, int signum, enu
* Definitely go to this thread, as it's
* already blocked in the kernel.
*/
- if (q->p_flag & P_SIGSUSPEND)
+ if (flags & P_SIGSUSPEND)
break;
}
}
Index: sys/kern/sys_generic.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.151
diff -u -p -r1.151 sys_generic.c
--- sys/kern/sys_generic.c 27 Dec 2022 20:13:03 -0000 1.151
+++ sys/kern/sys_generic.c 7 Feb 2023 17:17:44 -0000
@@ -596,15 +596,16 @@ dopselect(struct proc *p, int nd, fd_set
struct timespec zerots = {};
fd_mask bits[6];
fd_set *pibits[3], *pobits[3];
- int error, ncollected = 0, nevents = 0;
+ int error, nfiles, ncollected = 0, nevents = 0;
u_int ni;
if (nd < 0)
return (EINVAL);
- if (nd > p->p_fd->fd_nfiles) {
- /* forgiving; slightly wrong */
- nd = p->p_fd->fd_nfiles;
- }
+
+ nfiles = READ_ONCE(p->p_fd->fd_nfiles);
+ if (nd > nfiles)
+ nd = nfiles;
+
ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
if (ni > sizeof(bits[0])) {
caddr_t mbits;
Index: sys/kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.239
diff -u -p -r1.239 syscalls.master
--- sys/kern/syscalls.master 7 Jan 2023 05:24:58 -0000 1.239
+++ sys/kern/syscalls.master 7 Feb 2023 17:17:44 -0000
@@ -165,7 +165,7 @@
struct itimerval *oitv); }
70 STD NOLOCK { int sys_getitimer(int which, \
struct itimerval *itv); }
-71 STD { int sys_select(int nd, fd_set *in, fd_set *ou, \
+71 STD NOLOCK { int sys_select(int nd, fd_set *in, fd_set *ou, \
fd_set *ex, struct timeval *tv); }
72 STD NOLOCK { int sys_kevent(int fd, \
const struct kevent *changelist, int nchanges, \
@@ -233,7 +233,7 @@
109 STD { int sys_ppoll(struct pollfd *fds, \
u_int nfds, const struct timespec *ts, \
const sigset_t *mask); }
-110 STD { int sys_pselect(int nd, fd_set *in, fd_set *ou, \
+110 STD NOLOCK { int sys_pselect(int nd, fd_set *in, fd_set *ou, \
fd_set *ex, const struct timespec *ts, \
const sigset_t *mask); }
111 STD { int sys_sigsuspend(int mask); }
Index: sys/sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.339
diff -u -p -r1.339 proc.h
--- sys/sys/proc.h 16 Jan 2023 07:09:11 -0000 1.339
+++ sys/sys/proc.h 7 Feb 2023 17:17:45 -0000
@@ -386,7 +386,7 @@ struct proc {
struct user *p_addr; /* Kernel virtual addr of u-area */
struct mdproc p_md; /* Any machine-dependent fields. */
- sigset_t p_oldmask; /* Saved mask from before sigpause */
+ sigset_t p_oldmask; /* [o] Saved mask from before sigpause */
int p_sisig; /* For core dump/debugger XXX */
union sigval p_sigval; /* For core dump/debugger XXX */
long p_sitrapno; /* For core dump/debugger XXX */