Thanks for all the inputs, updated diff below.
On 13/04/20(Mon) 03:20, Alexandr Nedvedicky wrote:
> On Sun, Apr 12, 2020 at 07:02:43PM +0200, Mark Kettenis wrote:
> > > From: "Theo de Raadt" <[email protected]>
> > > Date: Sun, 12 Apr 2020 10:28:59 -0600
> > >
> > > > + if ((p->p_flag & P_SYSTEM) &&
> > > > + (strncmp(p->p_p->ps_comm, "softnet", 7) == 0))
> > >
> > > Wow that is ugly.
> >
> > A better approach might be to store a pointer to the softnet task's
> > struct proc in a global variable and check that. That is what we do
> > for the pagedaemon for example.
Diff below implements that by introducing the in_taskq() function, any
comment on that approach?
> I'm not sure same thing would work for network task. Currently
> there is a single instance of pagedaemon, however we hope to
> have more network tasks running in parallel.
Let's concentrate on the present. More work is required to have
multiple network tasks running in parallel anyway ;)
Diff below uses NET_RLOCK_IN_SOFTNET() with a corresponding KASSERT()
and converts all the places where tasks are enqueued on the "softnet"
taskq.
Any other comment or concern?
Index: kern/kern_task.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_task.c,v
retrieving revision 1.27
diff -u -p -r1.27 kern_task.c
--- kern/kern_task.c 19 Dec 2019 17:40:11 -0000 1.27
+++ kern/kern_task.c 14 Apr 2020 07:49:38 -0000
@@ -47,6 +47,7 @@ struct taskq {
unsigned int tq_nthreads;
unsigned int tq_flags;
const char *tq_name;
+ struct proc **tq_threads;
struct mutex tq_mtx;
struct task_list tq_worklist;
@@ -55,6 +56,7 @@ struct taskq {
#endif
};
+struct proc *systqproc;
static const char taskq_sys_name[] = "systq";
struct taskq taskq_sys = {
@@ -64,6 +66,7 @@ struct taskq taskq_sys = {
1,
0,
taskq_sys_name,
+ &systqproc,
MUTEX_INITIALIZER(IPL_HIGH),
TAILQ_HEAD_INITIALIZER(taskq_sys.tq_worklist),
#ifdef WITNESS
@@ -74,6 +77,7 @@ struct taskq taskq_sys = {
#endif
};
+struct proc *systqmpproc;
static const char taskq_sys_mp_name[] = "systqmp";
struct taskq taskq_sys_mp = {
@@ -83,6 +87,7 @@ struct taskq taskq_sys_mp = {
1,
TASKQ_MPSAFE,
taskq_sys_mp_name,
+ &systqmpproc,
MUTEX_INITIALIZER(IPL_HIGH),
TAILQ_HEAD_INITIALIZER(taskq_sys_mp.tq_worklist),
#ifdef WITNESS
@@ -129,6 +134,8 @@ taskq_create(const char *name, unsigned
tq->tq_nthreads = nthreads;
tq->tq_name = name;
tq->tq_flags = flags;
+ tq->tq_threads = mallocarray(nthreads, sizeof(*tq->tq_threads),
+ M_DEVBUF, M_WAITOK|M_ZERO);
mtx_init_flags(&tq->tq_mtx, ipl, name, 0);
TAILQ_INIT(&tq->tq_worklist);
@@ -172,6 +179,8 @@ taskq_destroy(struct taskq *tq)
}
mtx_leave(&tq->tq_mtx);
+ free(tq->tq_threads, M_DEVBUF,
+ tq->tq_nthreads * sizeof(*tq->tq_threads));
free(tq, M_DEVBUF, sizeof(*tq));
}
@@ -186,6 +195,8 @@ taskq_create_thread(void *arg)
switch (tq->tq_state) {
case TQ_S_DESTROYED:
mtx_leave(&tq->tq_mtx);
+ free(tq->tq_threads, M_DEVBUF,
+ tq->tq_nthreads * sizeof(*tq->tq_threads));
free(tq, M_DEVBUF, sizeof(*tq));
return;
@@ -356,7 +367,17 @@ taskq_thread(void *xtq)
{
struct taskq *tq = xtq;
struct task work;
- int last;
+ int last, i;
+
+ mtx_enter(&tq->tq_mtx);
+ for (i = 0; i < tq->tq_nthreads; i++) {
+ if (tq->tq_threads[i] == NULL) {
+ tq->tq_threads[i] = curproc;
+ break;
+ }
+ }
+ KASSERT(i < tq->tq_nthreads);
+ mtx_leave(&tq->tq_mtx);
if (ISSET(tq->tq_flags, TASKQ_MPSAFE))
KERNEL_UNLOCK();
@@ -381,4 +402,17 @@ taskq_thread(void *xtq)
wakeup_one(&tq->tq_running);
kthread_exit(0);
+}
+
+int
+in_taskq(struct taskq *tq)
+{
+ int i;
+
+ for (i = 0; i < tq->tq_nthreads; i++) {
+ if (curproc == tq->tq_threads[i])
+ return (1);
+ }
+
+ return (0);
}
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.603
diff -u -p -r1.603 if.c
--- net/if.c 12 Apr 2020 07:04:03 -0000 1.603
+++ net/if.c 14 Apr 2020 07:34:49 -0000
@@ -250,6 +250,47 @@ struct task if_input_task_locked = TASK_
struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
/*
+ * Reader version of NET_LOCK() to be used in "softnet" thread only.
+
+ * The "softnet" thread should be the only thread processing packets
+ * without holding an exclusive lock. This is done to allow read-only
+ * ioctl(2) to not block.
+ */
+void
+NET_RLOCK_IN_SOFTNET(void)
+{
+ KASSERT(in_taskq(net_tq(0)));
+ rw_enter_read(&netlock);
+}
+
+void
+NET_RUNLOCK_IN_SOFTNET(void)
+{
+ KASSERT(in_taskq(net_tq(0)));
+ rw_exit_read(&netlock);
+}
+
+/*
+ * Reader version of NET_LOCK() to be used in ioctl/sysctl path only.
+ *
+ * Can be grabbed instead of the exclusive version when no field
+ * protected by the NET_LOCK() is modified by the ioctl/sysctl.
+ */
+void
+NET_RLOCK_IN_IOCTL(void)
+{
+ KASSERT((curproc->p_flag & P_SYSTEM) == 0);
+ rw_enter_read(&netlock);
+}
+
+void
+NET_RUNLOCK_IN_IOCTL(void)
+{
+ KASSERT((curproc->p_flag & P_SYSTEM) == 0);
+ rw_exit_read(&netlock);
+}
+
+/*
* Network interface utility routines.
*/
void
@@ -936,7 +977,7 @@ if_input_process(struct ifnet *ifp, stru
*
* Since we have a NET_LOCK() we also use it to serialize access
* to PF globals, pipex globals, unicast and multicast addresses
- * lists.
+ * lists and the socket layer.
*/
NET_LOCK();
while ((m = ml_dequeue(ml)) != NULL)
@@ -2338,27 +2379,27 @@ ifioctl_get(u_long cmd, caddr_t data)
switch(cmd) {
case SIOCGIFCONF:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = ifconf(data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
case SIOCIFGCLONERS:
error = if_clone_list((struct if_clonereq *)data);
return (error);
case SIOCGIFGMEMB:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = if_getgroupmembers(data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
case SIOCGIFGATTR:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = if_getgroupattribs(data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
case SIOCGIFGLIST:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = if_getgrouplist(data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
}
@@ -2366,7 +2407,7 @@ ifioctl_get(u_long cmd, caddr_t data)
if (ifp == NULL)
return (ENXIO);
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
switch(cmd) {
case SIOCGIFFLAGS:
@@ -2434,7 +2475,7 @@ ifioctl_get(u_long cmd, caddr_t data)
panic("invalid ioctl %lu", cmd);
}
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
}
Index: net/if_gre.c
===================================================================
RCS file: /cvs/src/sys/net/if_gre.c,v
retrieving revision 1.156
diff -u -p -r1.156 if_gre.c
--- net/if_gre.c 12 Apr 2020 11:56:52 -0000 1.156
+++ net/if_gre.c 14 Apr 2020 07:05:54 -0000
@@ -3904,12 +3904,12 @@ nvgre_send4(struct nvgre_softc *sc, stru
imo.imo_ttl = sc->sc_tunnel.t_ttl;
imo.imo_loop = 0;
- NET_LOCK();
+ NET_RLOCK_IN_SOFTNET();
while ((m = ml_dequeue(ml)) != NULL) {
if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
oerrors++;
}
- NET_UNLOCK();
+ NET_RUNLOCK_IN_SOFTNET();
return (oerrors);
}
@@ -3926,12 +3926,12 @@ nvgre_send6(struct nvgre_softc *sc, stru
im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
im6o.im6o_loop = 0;
- NET_LOCK();
+ NET_RLOCK_IN_SOFTNET();
while ((m = ml_dequeue(ml)) != NULL) {
if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
oerrors++;
}
- NET_UNLOCK();
+ NET_RUNLOCK_IN_SOFTNET();
return (oerrors);
}
Index: net/if_pfsync.c
===================================================================
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.270
diff -u -p -r1.270 if_pfsync.c
--- net/if_pfsync.c 12 Apr 2020 11:56:52 -0000 1.270
+++ net/if_pfsync.c 14 Apr 2020 07:05:54 -0000
@@ -1523,7 +1523,7 @@ pfsync_send_dispatch(void *xmq)
if (ml_empty(&ml))
return;
- NET_LOCK();
+ NET_RLOCK_IN_SOFTNET();
sc = pfsyncif;
if (sc == NULL) {
ml_purge(&ml);
@@ -1541,7 +1541,7 @@ pfsync_send_dispatch(void *xmq)
}
}
done:
- NET_UNLOCK();
+ NET_RUNLOCK_IN_SOFTNET();
}
void
Index: net/if_vxlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.77
diff -u -p -r1.77 if_vxlan.c
--- net/if_vxlan.c 12 Apr 2020 11:56:52 -0000 1.77
+++ net/if_vxlan.c 14 Apr 2020 07:05:54 -0000
@@ -329,11 +329,11 @@ vxlan_send_dispatch(void *xsc)
if (ml_empty(&ml))
return;
- NET_LOCK();
+ NET_RLOCK_IN_SOFTNET();
while ((m = ml_dequeue(&ml)) != NULL) {
vxlan_output(ifp, m);
}
- NET_UNLOCK();
+ NET_RUNLOCK_IN_SOFTNET();
}
Index: net/pf_ioctl.c
===================================================================
RCS file: /cvs/src/sys/net/pf_ioctl.c,v
retrieving revision 1.350
diff -u -p -r1.350 pf_ioctl.c
--- net/pf_ioctl.c 12 Apr 2020 11:56:52 -0000 1.350
+++ net/pf_ioctl.c 14 Apr 2020 07:04:54 -0000
@@ -2921,12 +2921,12 @@ pf_sysctl(void *oldp, size_t *oldlenp, v
{
struct pf_status pfs;
- NET_LOCK();
+ NET_RLOCK_IN_IOCTL();
PF_LOCK();
memcpy(&pfs, &pf_status, sizeof(struct pf_status));
pfi_update_status(pfs.ifname, &pfs);
PF_UNLOCK();
- NET_UNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return sysctl_rdstruct(oldp, oldlenp, newp, &pfs, sizeof(pfs));
}
Index: netinet/in.c
===================================================================
RCS file: /cvs/src/sys/netinet/in.c,v
retrieving revision 1.169
diff -u -p -r1.169 in.c
--- netinet/in.c 15 Mar 2020 05:34:13 -0000 1.169
+++ netinet/in.c 12 Apr 2020 12:16:59 -0000
@@ -569,7 +569,7 @@ in_ioctl_get(u_long cmd, caddr_t data, s
return (error);
}
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET)
@@ -620,7 +620,7 @@ in_ioctl_get(u_long cmd, caddr_t data, s
}
err:
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
}
Index: netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.348
diff -u -p -r1.348 ip_input.c
--- netinet/ip_input.c 12 Apr 2020 11:56:52 -0000 1.348
+++ netinet/ip_input.c 14 Apr 2020 07:05:54 -0000
@@ -1784,11 +1784,11 @@ ip_send_dispatch(void *xmq)
if (ml_empty(&ml))
return;
- NET_LOCK();
+ NET_RLOCK_IN_SOFTNET();
while ((m = ml_dequeue(&ml)) != NULL) {
ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
}
- NET_UNLOCK();
+ NET_RUNLOCK_IN_SOFTNET();
}
void
Index: netinet/ip_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_mroute.c,v
retrieving revision 1.129
diff -u -p -r1.129 ip_mroute.c
--- netinet/ip_mroute.c 15 Mar 2020 05:34:13 -0000 1.129
+++ netinet/ip_mroute.c 12 Apr 2020 12:17:07 -0000
@@ -267,16 +267,16 @@ mrt_ioctl(struct socket *so, u_long cmd,
else
switch (cmd) {
case SIOCGETVIFCNT:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = get_vif_cnt(inp->inp_rtableid,
(struct sioc_vif_req *)data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
break;
case SIOCGETSGCNT:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = get_sg_cnt(inp->inp_rtableid,
(struct sioc_sg_req *)data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
break;
default:
error = ENOTTY;
Index: netinet6/in6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6.c,v
retrieving revision 1.235
diff -u -p -r1.235 in6.c
--- netinet6/in6.c 15 Mar 2020 05:34:14 -0000 1.235
+++ netinet6/in6.c 12 Apr 2020 12:17:25 -0000
@@ -412,7 +412,7 @@ in6_ioctl_get(u_long cmd, caddr_t data,
return (error);
}
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
if (sa6 != NULL) {
error = in6_check_embed_scope(sa6, ifp->if_index);
@@ -506,7 +506,7 @@ in6_ioctl_get(u_long cmd, caddr_t data,
}
err:
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
}
Index: netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.225
diff -u -p -r1.225 ip6_input.c
--- netinet6/ip6_input.c 12 Apr 2020 11:56:53 -0000 1.225
+++ netinet6/ip6_input.c 14 Apr 2020 07:05:54 -0000
@@ -1455,7 +1455,7 @@ ip6_send_dispatch(void *xmq)
if (ml_empty(&ml))
return;
- NET_LOCK();
+ NET_RLOCK_IN_SOFTNET();
while ((m = ml_dequeue(&ml)) != NULL) {
/*
* To avoid a "too big" situation at an intermediate router and
@@ -1466,7 +1466,7 @@ ip6_send_dispatch(void *xmq)
*/
ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
}
- NET_UNLOCK();
+ NET_RUNLOCK_IN_SOFTNET();
}
void
Index: netinet6/ip6_mroute.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_mroute.c,v
retrieving revision 1.123
diff -u -p -r1.123 ip6_mroute.c
--- netinet6/ip6_mroute.c 15 Mar 2020 05:34:14 -0000 1.123
+++ netinet6/ip6_mroute.c 12 Apr 2020 12:17:32 -0000
@@ -250,16 +250,16 @@ mrt6_ioctl(struct socket *so, u_long cmd
switch (cmd) {
case SIOCGETSGCNT_IN6:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = get_sg6_cnt((struct sioc_sg_req6 *)data,
inp->inp_rtableid);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
break;
case SIOCGETMIFCNT_IN6:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = get_mif6_cnt((struct sioc_mif_req6 *)data,
inp->inp_rtableid);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
break;
default:
error = ENOTTY;
Index: netinet6/nd6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.229
diff -u -p -r1.229 nd6.c
--- netinet6/nd6.c 29 Nov 2019 16:41:01 -0000 1.229
+++ netinet6/nd6.c 12 Apr 2020 12:17:48 -0000
@@ -1021,9 +1021,9 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
switch (cmd) {
case SIOCGIFINFO_IN6:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
ndi->ndi = *ND_IFINFO(ifp);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (0);
case SIOCGNBRINFO_IN6:
{
@@ -1031,7 +1031,7 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
struct in6_addr nb_addr = nbi->addr; /* make local for safety */
time_t expire;
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
/*
* XXX: KAME specific hack for scoped addresses
* XXXX: for other scopes than link-local?
@@ -1048,7 +1048,7 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
if (rt == NULL ||
(ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) {
rtfree(rt);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (EINVAL);
}
expire = ln->ln_rt->rt_expire;
@@ -1063,7 +1063,7 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
nbi->expire = expire;
rtfree(rt);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (0);
}
}
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.145
diff -u -p -r1.145 systm.h
--- sys/systm.h 20 Mar 2020 03:37:08 -0000 1.145
+++ sys/systm.h 14 Apr 2020 07:05:54 -0000
@@ -315,37 +315,32 @@ int uiomove(void *, size_t, struct uio *
extern struct rwlock netlock;
-#define NET_LOCK() NET_WLOCK()
-#define NET_UNLOCK() NET_WUNLOCK()
-#define NET_ASSERT_UNLOCKED() NET_ASSERT_WUNLOCKED()
+/*
+ * Network stack data structures are, unless stated otherwise, protected
+ * by the NET_LOCK(). It's a single non-recursive lock for the whole
+ * subsystem.
+ */
+#define NET_LOCK() do { rw_enter_write(&netlock); } while (0)
+#define NET_UNLOCK() do { rw_exit_write(&netlock); } while (0)
-
-#define NET_WLOCK() do { rw_enter_write(&netlock); } while (0)
-#define NET_WUNLOCK() do { rw_exit_write(&netlock); } while (0)
-
-#define NET_ASSERT_WLOCKED()
\
-do { \
- int _s = rw_status(&netlock); \
- if ((splassert_ctl > 0) && (_s != RW_WRITE)) \
- splassert_fail(RW_WRITE, _s, __func__); \
-} while (0)
-
-#define NET_ASSERT_WUNLOCKED()
\
+#define NET_ASSERT_UNLOCKED()
\
do { \
int _s = rw_status(&netlock); \
if ((splassert_ctl > 0) && (_s == RW_WRITE)) \
splassert_fail(0, RW_WRITE, __func__); \
} while (0)
-#define NET_RLOCK() do { rw_enter_read(&netlock); } while (0)
-#define NET_RUNLOCK() do { rw_exit_read(&netlock); } while (0)
-
#define NET_ASSERT_LOCKED()
\
do { \
int _s = rw_status(&netlock); \
if ((splassert_ctl > 0) && (_s != RW_WRITE && _s != RW_READ)) \
splassert_fail(RW_READ, _s, __func__); \
} while (0)
+
+void NET_RLOCK_IN_SOFTNET(void);
+void NET_RUNLOCK_IN_SOFTNET(void);
+void NET_RLOCK_IN_IOCTL(void);
+void NET_RUNLOCK_IN_IOCTL(void);
__returns_twice int setjmp(label_t *);
__dead void longjmp(label_t *);
Index: sys/task.h
===================================================================
RCS file: /cvs/src/sys/sys/task.h,v
retrieving revision 1.16
diff -u -p -r1.16 task.h
--- sys/task.h 23 Jun 2019 12:56:10 -0000 1.16
+++ sys/task.h 14 Apr 2020 07:34:14 -0000
@@ -48,6 +48,7 @@ void taskq_destroy(struct taskq *);
void taskq_barrier(struct taskq *);
void taskq_del_barrier(struct taskq *, struct task *);
+int in_taskq(struct taskq *);
void task_set(struct task *, void (*)(void *), void *);
int task_add(struct taskq *, struct task *);