Module Name: src
Committed By: riastradh
Date: Thu Mar 10 12:21:25 UTC 2022
Modified Files:
src/sys/kern: kern_lwp.c kern_synch.c
Log Message:
kern: Fix synchronization of clearing LP_RUNNING and lwp_free.
1. membar_sync is not necessary here -- only a store-release is
required.
2. membar_consumer _before_ loading l->l_pflag is not enough; a
load-acquire is required.
Actually it's not really clear to me why any barriers are needed, since
the store-release and load-acquire should be implied by releasing and
acquiring the lwp lock (and maybe we could spin with the lock instead
of reading l->l_pflag unlocked). But maybe there's something subtle
about access to l->l_mutex that's not obvious here.
To generate a diff of this commit:
cvs rdiff -u -r1.246 -r1.247 src/sys/kern/kern_lwp.c
cvs rdiff -u -r1.349 -r1.350 src/sys/kern/kern_synch.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/kern/kern_lwp.c
diff -u src/sys/kern/kern_lwp.c:1.246 src/sys/kern/kern_lwp.c:1.247
--- src/sys/kern/kern_lwp.c:1.246 Wed Dec 22 16:57:28 2021
+++ src/sys/kern/kern_lwp.c Thu Mar 10 12:21:25 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_lwp.c,v 1.246 2021/12/22 16:57:28 thorpej Exp $ */
+/* $NetBSD: kern_lwp.c,v 1.247 2022/03/10 12:21:25 riastradh Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020
@@ -217,7 +217,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.246 2021/12/22 16:57:28 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.247 2022/03/10 12:21:25 riastradh Exp $");
#include "opt_ddb.h"
#include "opt_lockdebug.h"
@@ -1017,16 +1017,19 @@ lwp_startup(struct lwp *prev, struct lwp
KASSERT(curcpu()->ci_mtx_count == -2);
/*
- * Immediately mark the previous LWP as no longer running and unlock
- * (to keep lock wait times short as possible). If a zombie, don't
- * touch after clearing LP_RUNNING as it could be reaped by another
- * CPU. Issue a memory barrier to ensure this.
+ * Immediately mark the previous LWP as no longer running and
+ * unlock (to keep lock wait times short as possible). If a
+ * zombie, don't touch after clearing LP_RUNNING as it could be
+ * reaped by another CPU. Use atomic_store_release to ensure
+ * this -- matches atomic_load_acquire in lwp_free.
*/
lock = prev->l_mutex;
if (__predict_false(prev->l_stat == LSZOMB)) {
- membar_sync();
+ atomic_store_release(&prev->l_pflag,
+ prev->l_pflag & ~LP_RUNNING);
+ } else {
+ prev->l_pflag &= ~LP_RUNNING;
}
- prev->l_pflag &= ~LP_RUNNING;
mutex_spin_exit(lock);
/* Correct spin mutex count after mi_switch(). */
@@ -1246,9 +1249,12 @@ lwp_free(struct lwp *l, bool recycle, bo
/*
* In the unlikely event that the LWP is still on the CPU,
* then spin until it has switched away.
+ *
+ * atomic_load_acquire matches atomic_store_release in
+ * lwp_startup and mi_switch.
*/
- membar_consumer();
- while (__predict_false((l->l_pflag & LP_RUNNING) != 0)) {
+ while (__predict_false((atomic_load_acquire(&l->l_pflag) & LP_RUNNING)
+ != 0)) {
SPINLOCK_BACKOFF_HOOK;
}
Index: src/sys/kern/kern_synch.c
diff -u src/sys/kern/kern_synch.c:1.349 src/sys/kern/kern_synch.c:1.350
--- src/sys/kern/kern_synch.c:1.349 Sat May 23 23:42:43 2020
+++ src/sys/kern/kern_synch.c Thu Mar 10 12:21:25 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_synch.c,v 1.349 2020/05/23 23:42:43 ad Exp $ */
+/* $NetBSD: kern_synch.c,v 1.350 2022/03/10 12:21:25 riastradh Exp $ */
/*-
* Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020
@@ -69,7 +69,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.349 2020/05/23 23:42:43 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.350 2022/03/10 12:21:25 riastradh Exp $");
#include "opt_kstack.h"
#include "opt_dtrace.h"
@@ -783,18 +783,23 @@ mi_switch(lwp_t *l)
/*
* Immediately mark the previous LWP as no longer running
- * and unlock (to keep lock wait times short as possible).
+ * and unlock (to keep lock wait times short as possible).
* We'll still be at IPL_SCHED afterwards. If a zombie,
* don't touch after clearing LP_RUNNING as it could be
* reaped by another CPU. Issue a memory barrier to ensure
* this.
+ *
+ * atomic_store_release matches atomic_load_acquire in
+ * lwp_free.
*/
KASSERT((prevlwp->l_pflag & LP_RUNNING) != 0);
lock = prevlwp->l_mutex;
if (__predict_false(prevlwp->l_stat == LSZOMB)) {
- membar_sync();
+ atomic_store_release(&prevlwp->l_pflag,
+ prevlwp->l_pflag & ~LP_RUNNING);
+ } else {
+ prevlwp->l_pflag &= ~LP_RUNNING;
}
- prevlwp->l_pflag &= ~LP_RUNNING;
mutex_spin_exit(lock);
/*