At least network subset of sysctl(8) MIBs relies on netlock or another
locks and doesn't require kernel lock. Also some integers in other
subsets can be read without kernel lock held.
Diff below actually pushes kernel lock down to net_sysctl(). It is
required for MPLS and PFLOW cases. The protocol *_sysctls() are left
under kernel lock to make the review of this diff easier. I want to
sequentially unlock them later. The rest of cases are easy to review.
net_lisnk_sysctl() only calls net_ifq_sysctl() which only returns error.
unp_sysctl() does access to integer variables, which are already
accessed in uipc_attach() without kernel lock held. pipex_sysctl() also
accesses integer variable which is lockless accessed in the
pppx(4)/pipex(4) layer.
ok?
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.411
diff -u -p -r1.411 kern_sysctl.c
--- sys/kern/kern_sysctl.c 22 Jan 2023 12:05:44 -0000 1.411
+++ sys/kern/kern_sysctl.c 21 Apr 2023 13:48:13 -0000
@@ -168,7 +168,7 @@ sys_sysctl(struct proc *p, void *v, regi
syscallarg(void *) new;
syscallarg(size_t) newlen;
} */ *uap = v;
- int error, dolock = 1;
+ int error, dokernellock = 1, dolock = 1;
size_t savelen = 0, oldlen = 0;
sysctlfn *fn;
int name[CTL_MAXNAME];
@@ -203,6 +203,7 @@ sys_sysctl(struct proc *p, void *v, regi
break;
case CTL_NET:
fn = net_sysctl;
+ dokernellock = 0;
break;
case CTL_FS:
fn = fs_sysctl;
@@ -230,19 +231,22 @@ sys_sysctl(struct proc *p, void *v, regi
if (SCARG(uap, oldlenp) &&
(error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
return (error);
+ if (dokernellock)
+ KERNEL_LOCK();
if (SCARG(uap, old) != NULL) {
if ((error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR)) != 0)
- return (error);
+ goto unlock;
if (dolock) {
if (atop(oldlen) > uvmexp.wiredmax - uvmexp.wired) {
rw_exit_write(&sysctl_lock);
- return (ENOMEM);
+ error = ENOMEM;
+ goto unlock;
}
error = uvm_vslock(p, SCARG(uap, old), oldlen,
PROT_READ | PROT_WRITE);
if (error) {
rw_exit_write(&sysctl_lock);
- return (error);
+ goto unlock;
}
}
savelen = oldlen;
@@ -254,6 +258,9 @@ sys_sysctl(struct proc *p, void *v, regi
uvm_vsunlock(p, SCARG(uap, old), savelen);
rw_exit_write(&sysctl_lock);
}
+unlock:
+ if (dokernellock)
+ KERNEL_UNLOCK();
if (error)
return (error);
if (SCARG(uap, oldlenp))
Index: sys/kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.246
diff -u -p -r1.246 syscalls.master
--- sys/kern/syscalls.master 25 Feb 2023 09:55:46 -0000 1.246
+++ sys/kern/syscalls.master 21 Apr 2023 13:48:13 -0000
@@ -361,7 +361,7 @@
199 OBSOL pad_lseek
200 OBSOL pad_truncate
201 OBSOL pad_ftruncate
-202 STD { int sys_sysctl(const int *name, u_int namelen, \
+202 STD NOLOCK { int sys_sysctl(const int *name, u_int namelen, \
void *old, size_t *oldlenp, void *new, \
size_t newlen); }
203 STD { int sys_mlock(const void *addr, size_t len); }
Index: sys/kern/uipc_domain.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_domain.c,v
retrieving revision 1.60
diff -u -p -r1.60 uipc_domain.c
--- sys/kern/uipc_domain.c 14 Aug 2022 01:58:28 -0000 1.60
+++ sys/kern/uipc_domain.c 21 Apr 2023 13:48:13 -0000
@@ -183,8 +183,8 @@ net_link_sysctl(int *name, u_int namelen
}
int
-net_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
- size_t newlen, struct proc *p)
+net_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen, struct proc *p)
{
const struct domain *dp;
const struct protosw *pr;
@@ -213,9 +213,13 @@ net_sysctl(int *name, u_int namelen, voi
newp, newlen));
#endif
#if NPFLOW > 0
- if (family == PF_PFLOW)
- return (pflow_sysctl(name + 1, namelen - 1, oldp, oldlenp,
- newp, newlen));
+ if (family == PF_PFLOW) {
+ KERNEL_LOCK();
+ error = pflow_sysctl(name + 1, namelen - 1, oldp, oldlenp,
+ newp, newlen);
+ KERNEL_UNLOCK();
+ return (error);
+ }
#endif
#ifdef PIPEX
if (family == PF_PIPEX)
@@ -223,9 +227,13 @@ net_sysctl(int *name, u_int namelen, voi
newp, newlen));
#endif
#ifdef MPLS
- if (family == PF_MPLS)
- return (mpls_sysctl(name + 1, namelen - 1, oldp, oldlenp,
- newp, newlen));
+ if (family == PF_MPLS) {
+ KERNEL_LOCK();
+ error = mpls_sysctl(name + 1, namelen - 1, oldp, oldlenp,
+ newp, newlen);
+ KERNEL_UNLOCK();
+ return (error);
+ }
#endif
dp = pffinddomain(family);
if (dp == NULL)
@@ -236,8 +244,10 @@ net_sysctl(int *name, u_int namelen, voi
protocol = name[1];
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
if (pr->pr_protocol == protocol && pr->pr_sysctl) {
+ KERNEL_LOCK();
error = (*pr->pr_sysctl)(name + 2, namelen - 2,
oldp, oldlenp, newp, newlen);
+ KERNEL_UNLOCK();
return (error);
}
return (ENOPROTOOPT);