On Thu, Apr 27, 2023 at 02:54:38PM +0200, Claudio Jeker wrote:
> On Thu, Apr 27, 2023 at 01:55:33PM +0300, Vitaliy Makkoveev wrote:
> > Now only direct netlock used for inet sockets protection. The unlocked
> > access to all other sockets is safe, but we could lost consistency for a
> > little. Since the solock() used for sockets protection, make locking
> > path common and use it. Make it shared because this is read-only access
> > to sockets data. For same reason use shared netlock while performing
> > inpcb tables foreach walkthrough.
>
> This is still wrong. fill_file() is not allowed to sleep. So you can not
> use a rwlock in here. fill_file is called inside a allprocess
> LIST_FOREACH loop and sleeping there will blow up.
>
> Maybe it is enough to grab a shared NETLOCK around the LIST_FOREACH() to
> ensure that we don't sleep inside.
>
> Please fix this properly. Right now running fstat is like playing russian
> roulette.
>
Except __pmap_asid_alloc() from arch/sh/sh/pmap.c of uniprocessor
machine, proc_stop_sweep() is the only place where `ps_list' can't be
protected by `allprocess_lock' rwlock(9). Since the refactoring is not
obvious, for now I propose to use two locks for `ps_list' as we do for
some other data structures like `ps_threads' or `if_list'. For the code
paths where context switch is unwanted, the read only acces to `ps_list'
will be protected by kernel lock, and by `allprocess_lock' rwlock(9) in
all the rest. The `ps_list' list modification will require both locks to
be held. Not the best solution, but it fixes sysctl_file().
Index: sys/kern/kern_exit.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.211
diff -u -p -r1.211 kern_exit.c
--- sys/kern/kern_exit.c 25 Apr 2023 18:14:06 -0000 1.211
+++ sys/kern/kern_exit.c 16 May 2023 11:14:49 -0000
@@ -223,6 +223,8 @@ exit1(struct proc *p, int xexit, int xsi
p->p_fd = NULL; /* zap the thread's copy */
+ rw_enter_write(&allprocess_lock);
+
/*
* Remove proc from pidhash chain and allproc so looking
* it up won't work. We will put the proc on the
@@ -295,6 +297,8 @@ exit1(struct proc *p, int xexit, int xsi
process_clear_orphan(qr);
}
}
+
+ rw_exit_write(&allprocess_lock);
/* add thread's accumulated rusage into the process's total */
ruadd(rup, &p->p_ru);
Index: sys/kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.247
diff -u -p -r1.247 kern_fork.c
--- sys/kern/kern_fork.c 25 Apr 2023 18:14:06 -0000 1.247
+++ sys/kern/kern_fork.c 16 May 2023 11:14:49 -0000
@@ -62,6 +62,11 @@
#include <uvm/uvm.h>
#include <machine/tcb.h>
+/*
+ * Locks used to protect struct members in this file:
+ * P allprocess_lock
+ */
+
int nprocesses = 1; /* process 0 */
int nthreads = 1; /* proc 0 */
struct forkstat forkstat;
@@ -372,6 +377,8 @@ fork1(struct proc *curp, int flags, void
return (ENOMEM);
}
+ rw_enter_write(&allprocess_lock);
+
/*
* From now on, we're committed to the fork and cannot fail.
*/
@@ -468,19 +475,28 @@ fork1(struct proc *curp, int flags, void
knote_locked(&curpr->ps_klist, NOTE_FORK | pr->ps_pid);
/*
- * Update stats now that we know the fork was successful.
+ * Return child pid to parent process
*/
- uvmexp.forks++;
- if (flags & FORK_PPWAIT)
- uvmexp.forks_ppwait++;
- if (flags & FORK_SHAREVM)
- uvmexp.forks_sharevm++;
+ if (retval != NULL)
+ *retval = pr->ps_pid;
/*
* Pass a pointer to the new process to the caller.
+ * XXX but we don't return `rnewprocp' to sys_fork()
+ * or sys_vfork().
*/
if (rnewprocp != NULL)
*rnewprocp = p;
+ rw_exit_write(&allprocess_lock);
+
+ /*
+ * Update stats now that we know the fork was successful.
+ */
+ uvmexp.forks++;
+ if (flags & FORK_PPWAIT)
+ uvmexp.forks_ppwait++;
+ if (flags & FORK_SHAREVM)
+ uvmexp.forks_sharevm++;
/*
* Preserve synchronization semantics of vfork. If waiting for
@@ -499,11 +515,6 @@ fork1(struct proc *curp, int flags, void
if ((flags & FORK_PTRACE) && (curpr->ps_flags & PS_TRACED))
psignal(curp, SIGTRAP);
- /*
- * Return child pid to parent process
- */
- if (retval != NULL)
- *retval = pr->ps_pid;
return (0);
}
@@ -610,7 +621,7 @@ alloctid(void)
/*
* Checks for current use of a pid, either as a pid or pgid.
*/
-pid_t oldpids[128];
+pid_t oldpids[128]; /* [P] */
int
ispidtaken(pid_t pid)
{
Index: sys/kern/kern_ktrace.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_ktrace.c,v
retrieving revision 1.112
diff -u -p -r1.112 kern_ktrace.c
--- sys/kern/kern_ktrace.c 11 May 2023 09:51:33 -0000 1.112
+++ sys/kern/kern_ktrace.c 16 May 2023 11:14:49 -0000
@@ -434,6 +434,7 @@ doktrace(struct vnode *vp, int ops, int
* Clear all uses of the tracefile
*/
if (ops == KTROP_CLEARFILE) {
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list) {
if (pr->ps_tracevp == vp) {
if (ktrcanset(p, pr))
@@ -442,6 +443,7 @@ doktrace(struct vnode *vp, int ops, int
error = EPERM;
}
}
+ rw_exit_read(&allprocess_lock);
goto done;
}
/*
@@ -674,12 +676,14 @@ bad:
*/
log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
error);
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list) {
if (pr == curp->p_p)
continue;
if (pr->ps_tracevp == vp && pr->ps_tracecred == cred)
ktrcleartrace(pr);
}
+ rw_exit_read(&allprocess_lock);
ktrcleartrace(curp->p_p);
return (error);
}
Index: sys/kern/kern_proc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.94
diff -u -p -r1.94 kern_proc.c
--- sys/kern/kern_proc.c 2 Jan 2023 23:09:48 -0000 1.94
+++ sys/kern/kern_proc.c 16 May 2023 11:14:49 -0000
@@ -46,6 +46,8 @@
/*
* Locks used to protect struct members in this file:
* I immutable after creation
+ * K kernel lock
+ * P allprocess_lock
* U uidinfolk
*/
@@ -54,6 +56,8 @@ struct rwlock uidinfolk;
LIST_HEAD(uihashhead, uidinfo) *uihashtbl; /* [U] */
u_long uihash; /* [I] size of hash table - 1 */
+struct rwlock allprocess_lock;
+
/*
* Other process lists
*/
@@ -63,7 +67,7 @@ struct pidhashhead *pidhashtbl;
u_long pidhash;
struct pgrphashhead *pgrphashtbl;
u_long pgrphash;
-struct processlist allprocess;
+struct processlist allprocess; /* [K|P] */
struct processlist zombprocess;
struct proclist allproc;
@@ -92,6 +96,7 @@ procinit(void)
LIST_INIT(&zombprocess);
LIST_INIT(&allproc);
+ rw_init(&allprocess_lock, "allpslk");
rw_init(&uidinfolk, "uidinfo");
tidhashtbl = hashinit(maxthread / 4, M_PROC, M_NOWAIT, &tidhash);
Index: sys/kern/kern_resource.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_resource.c,v
retrieving revision 1.77
diff -u -p -r1.77 kern_resource.c
--- sys/kern/kern_resource.c 4 Feb 2023 19:33:03 -0000 1.77
+++ sys/kern/kern_resource.c 16 May 2023 11:14:49 -0000
@@ -122,10 +122,12 @@ sys_getpriority(struct proc *curp, void
case PRIO_USER:
if (SCARG(uap, who) == 0)
SCARG(uap, who) = curp->p_ucred->cr_uid;
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list)
if (pr->ps_ucred->cr_uid == SCARG(uap, who) &&
pr->ps_nice < low)
low = pr->ps_nice;
+ rw_exit_read(&allprocess_lock);
break;
default:
@@ -178,11 +180,13 @@ sys_setpriority(struct proc *curp, void
case PRIO_USER:
if (SCARG(uap, who) == 0)
SCARG(uap, who) = curp->p_ucred->cr_uid;
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list)
if (pr->ps_ucred->cr_uid == SCARG(uap, who)) {
error = donice(curp, pr, SCARG(uap, prio));
found = 1;
}
+ rw_exit_read(&allprocess_lock);
break;
default:
Index: sys/kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.306
diff -u -p -r1.306 kern_sig.c
--- sys/kern/kern_sig.c 3 Apr 2023 11:57:50 -0000 1.306
+++ sys/kern/kern_sig.c 16 May 2023 11:14:49 -0000
@@ -670,6 +670,7 @@ killpg1(struct proc *cp, int signum, int
/*
* broadcast
*/
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list) {
if (pr->ps_pid <= 1 ||
pr->ps_flags & (PS_SYSTEM | PS_NOBROADCASTKILL) ||
@@ -679,6 +680,7 @@ killpg1(struct proc *cp, int signum, int
if (signum)
prsignal(pr, signum);
}
+ rw_exit_read(&allprocess_lock);
} else {
if (pgid == 0)
/*
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.412
diff -u -p -r1.412 kern_sysctl.c
--- sys/kern/kern_sysctl.c 4 May 2023 09:40:36 -0000 1.412
+++ sys/kern/kern_sysctl.c 16 May 2023 11:14:49 -0000
@@ -1426,6 +1426,7 @@ sysctl_file(int *name, u_int namelen, ch
break;
}
matched = 0;
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list) {
/*
* skip system, exiting, embryonic and undead
@@ -1454,10 +1455,12 @@ sysctl_file(int *name, u_int namelen, ch
FRELE(fp, p);
}
}
+ rw_exit_read(&allprocess_lock);
if (!matched)
error = ESRCH;
break;
case KERN_FILE_BYUID:
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list) {
/*
* skip system, exiting, embryonic and undead
@@ -1483,6 +1486,7 @@ sysctl_file(int *name, u_int namelen, ch
FRELE(fp, p);
}
}
+ rw_exit_read(&allprocess_lock);
break;
default:
error = EINVAL;
@@ -1538,8 +1542,9 @@ sysctl_doproc(int *name, u_int namelen,
if (where != NULL)
kproc = malloc(sizeof(*kproc), M_TEMP, M_WAITOK);
- pr = LIST_FIRST(&allprocess);
doingzomb = 0;
+ rw_enter_read(&allprocess_lock);
+ pr = LIST_FIRST(&allprocess);
again:
for (; pr != NULL; pr = LIST_NEXT(pr, ps_list)) {
/* XXX skip processes in the middle of being zapped */
@@ -1602,6 +1607,7 @@ again:
break;
default:
+ rw_exit_read(&allprocess_lock);
error = EINVAL;
goto err;
}
@@ -1609,8 +1615,10 @@ again:
if (buflen >= elem_size && elem_count > 0) {
fill_kproc(pr, kproc, NULL, show_pointers);
error = copyout(kproc, dp, elem_size);
- if (error)
+ if (error) {
+ rw_exit_read(&allprocess_lock);
goto err;
+ }
dp += elem_size;
buflen -= elem_size;
elem_count--;
@@ -1625,8 +1633,10 @@ again:
if (buflen >= elem_size && elem_count > 0) {
fill_kproc(pr, kproc, p, show_pointers);
error = copyout(kproc, dp, elem_size);
- if (error)
+ if (error) {
+ rw_exit_read(&allprocess_lock);
goto err;
+ }
dp += elem_size;
buflen -= elem_size;
elem_count--;
@@ -1639,6 +1649,8 @@ again:
doingzomb++;
goto again;
}
+ rw_exit_read(&allprocess_lock);
+
if (where != NULL) {
*sizep = dp - where;
if (needed > *sizep) {
Index: sys/kern/kern_unveil.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_unveil.c,v
retrieving revision 1.55
diff -u -p -r1.55 kern_unveil.c
--- sys/kern/kern_unveil.c 5 Dec 2022 23:18:37 -0000 1.55
+++ sys/kern/kern_unveil.c 16 May 2023 11:14:49 -0000
@@ -823,6 +823,7 @@ unveil_removevnode(struct vnode *vp)
vref(vp); /* make sure it is held till we are done */
+ rw_assert_rdlock(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list) {
struct unveil * uv;
Index: sys/kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.361
diff -u -p -r1.361 vfs_syscalls.c
--- sys/kern/vfs_syscalls.c 11 Feb 2023 23:22:17 -0000 1.361
+++ sys/kern/vfs_syscalls.c 16 May 2023 11:14:49 -0000
@@ -325,6 +325,7 @@ checkdirs(struct vnode *olddp)
return;
if (VFS_ROOT(olddp->v_mountedhere, &newdp))
panic("mount: lost mount");
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list) {
fdp = pr->ps_fd;
if (fdp->fd_cdir == olddp) {
@@ -338,6 +339,7 @@ checkdirs(struct vnode *olddp)
fdp->fd_rdir = newdp;
}
}
+ rw_exit_read(&allprocess_lock);
if (rootvnode == olddp) {
free_count++;
vref(newdp);
@@ -483,12 +485,20 @@ dounmount_leaf(struct mount *mp, int fla
}
/*
+ * Protect `allprocess' list access within
+ * unveil_removevnode(). Do this here to avoid
+ * context switch within `mnt_vnodelist' foreach
+ * loop.
+ */
+ rw_enter_read(&allprocess_lock);
+ /*
* Before calling file system unmount, make sure
* all unveils to vnodes in here are dropped.
*/
TAILQ_FOREACH_SAFE(vp , &mp->mnt_vnodelist, v_mntvnodes, nvp) {
unveil_removevnode(vp);
}
+ rw_exit_read(&allprocess_lock);
if (((mp->mnt_flag & MNT_RDONLY) ||
(error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) ||
Index: sys/sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.340
diff -u -p -r1.340 proc.h
--- sys/sys/proc.h 25 Apr 2023 18:14:06 -0000 1.340
+++ sys/sys/proc.h 16 May 2023 11:14:49 -0000
@@ -124,6 +124,7 @@ struct unveil;
* K kernel lock
* m this process' `ps_mtx'
* p this process' `ps_lock'
+ * P allprocess_lock
* R rlimit_lock
* S scheduler lock
* T itimer_mtx
@@ -137,7 +138,7 @@ struct process {
struct proc *ps_mainproc;
struct ucred *ps_ucred; /* Process owner's identity. */
- LIST_ENTRY(process) ps_list; /* List of all processes. */
+ LIST_ENTRY(process) ps_list; /* [K|P] List of all processes. */
TAILQ_HEAD(,proc) ps_threads; /* [K|S] Threads in this process. */
LIST_ENTRY(process) ps_pglist; /* List of processes in pgrp. */
@@ -502,6 +503,8 @@ extern struct proc proc0; /* Process sl
extern struct process process0; /* Process slot for kernel
threads. */
extern int nprocesses, maxprocess; /* Cur and max number of processes. */
extern int nthreads, maxthread; /* Cur and max number of
threads. */
+
+extern struct rwlock allprocess_lock;
LIST_HEAD(proclist, proc);
LIST_HEAD(processlist, process);