Module Name: src
Committed By: rmind
Date: Sun Mar 20 23:19:16 UTC 2011
Modified Files:
src/sys/kern: kern_lwp.c kern_mutex.c kern_rwlock.c
Log Message:
Optimise mutex_onproc() and rw_onproc() by making them O(1), instead
of O(ncpu) for adaptive paths. Add an LWP destructor, lwp_dtor() with
a comment describing the principle of this barrier.
Reviewed by yamt@ and ad@.
To generate a diff of this commit:
cvs rdiff -u -r1.156 -r1.157 src/sys/kern/kern_lwp.c
cvs rdiff -u -r1.49 -r1.50 src/sys/kern/kern_mutex.c
cvs rdiff -u -r1.36 -r1.37 src/sys/kern/kern_rwlock.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/kern/kern_lwp.c
diff -u src/sys/kern/kern_lwp.c:1.156 src/sys/kern/kern_lwp.c:1.157
--- src/sys/kern/kern_lwp.c:1.156 Mon Feb 21 20:23:28 2011
+++ src/sys/kern/kern_lwp.c Sun Mar 20 23:19:16 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_lwp.c,v 1.156 2011/02/21 20:23:28 pooka Exp $ */
+/* $NetBSD: kern_lwp.c,v 1.157 2011/03/20 23:19:16 rmind Exp $ */
/*-
* Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -211,7 +211,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.156 2011/02/21 20:23:28 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.157 2011/03/20 23:19:16 rmind Exp $");
#include "opt_ddb.h"
#include "opt_lockdebug.h"
@@ -240,6 +240,7 @@
#include <sys/filedesc.h>
#include <sys/dtrace_bsd.h>
#include <sys/sdt.h>
+#include <sys/xcall.h>
#include <uvm/uvm_extern.h>
#include <uvm/uvm_object.h>
@@ -247,6 +248,8 @@
static pool_cache_t lwp_cache __read_mostly;
struct lwplist alllwp __cacheline_aligned;
+static void lwp_dtor(void *, void *);
+
/* DTrace proc provider probes */
SDT_PROBE_DEFINE(proc,,,lwp_create,
"struct lwp *", NULL,
@@ -293,7 +296,7 @@
lwpinit_specificdata();
lwp_sys_init();
lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0,
- "lwppl", NULL, IPL_NONE, NULL, NULL, NULL);
+ "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL);
}
void
@@ -318,6 +321,26 @@
SYSCALL_TIME_LWP_INIT(l);
}
+static void
+lwp_dtor(void *arg, void *obj)
+{
+ lwp_t *l = obj;
+ uint64_t where;
+ (void)l;
+
+ /*
+ * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu()
+ * calls will exit before memory of LWP is returned to the pool, where
+ * KVA of LWP structure might be freed and re-used for other purposes.
+ * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu()
+ * callers, therefore cross-call to all CPUs will do the job. Also,
+ * the value of l->l_cpu must be still valid at this point.
+ */
+ KASSERT(l->l_cpu != NULL);
+ where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
+ xc_wait(where);
+}
+
/*
* Set an suspended.
*
Index: src/sys/kern/kern_mutex.c
diff -u src/sys/kern/kern_mutex.c:1.49 src/sys/kern/kern_mutex.c:1.50
--- src/sys/kern/kern_mutex.c:1.49 Mon Feb 8 09:54:27 2010
+++ src/sys/kern/kern_mutex.c Sun Mar 20 23:19:16 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_mutex.c,v 1.49 2010/02/08 09:54:27 skrll Exp $ */
+/* $NetBSD: kern_mutex.c,v 1.50 2011/03/20 23:19:16 rmind Exp $ */
/*-
* Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -40,7 +40,7 @@
#define __MUTEX_PRIVATE
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.49 2010/02/08 09:54:27 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.50 2011/03/20 23:19:16 rmind Exp $");
#include <sys/param.h>
#include <sys/atomic.h>
@@ -53,6 +53,7 @@
#include <sys/kernel.h>
#include <sys/intr.h>
#include <sys/lock.h>
+#include <sys/types.h>
#include <dev/lockstat.h>
@@ -249,9 +250,8 @@
__strong_alias(mutex_spin_exit,mutex_vector_exit);
#endif
-void mutex_abort(kmutex_t *, const char *, const char *);
-void mutex_dump(volatile void *);
-int mutex_onproc(uintptr_t, struct cpu_info **);
+static void mutex_abort(kmutex_t *, const char *, const char *);
+static void mutex_dump(volatile void *);
lockops_t mutex_spin_lockops = {
"Mutex",
@@ -379,44 +379,40 @@
MUTEX_DESTROY(mtx);
}
+#ifdef MULTIPROCESSOR
/*
- * mutex_onproc:
+ * mutex_oncpu:
*
* Return true if an adaptive mutex owner is running on a CPU in the
* system. If the target is waiting on the kernel big lock, then we
* must release it. This is necessary to avoid deadlock.
- *
- * Note that we can't use the mutex owner field as an LWP pointer. We
- * don't have full control over the timing of our execution, and so the
- * pointer could be completely invalid by the time we dereference it.
*/
-#ifdef MULTIPROCESSOR
-int
-mutex_onproc(uintptr_t owner, struct cpu_info **cip)
+static bool
+mutex_oncpu(uintptr_t owner)
{
- CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
- struct lwp *l;
+ lwp_t *l;
- if (!MUTEX_OWNED(owner))
- return 0;
- l = (struct lwp *)MUTEX_OWNER(owner);
+ KASSERT(kpreempt_disabled());
- /* See if the target is running on a CPU somewhere. */
- if ((ci = *cip) != NULL && ci->ci_curlwp == l)
- goto run;
- for (CPU_INFO_FOREACH(cii, ci))
- if (ci->ci_curlwp == l)
- goto run;
+ if (!MUTEX_OWNED(owner)) {
+ return false;
+ }
- /* No: it may be safe to block now. */
- *cip = NULL;
- return 0;
+ /*
+ * See lwp_dtor() why dereference of the LWP pointer is safe.
+ * We must have kernel preemption disabled for that.
+ */
+ l = (lwp_t *)MUTEX_OWNER(owner);
+ ci = l->l_cpu;
+
+ if (ci && ci->ci_curlwp == l) {
+ /* Target is running; do we need to block? */
+ return (ci->ci_biglock_wanted != l);
+ }
- run:
- /* Target is running; do we need to block? */
- *cip = ci;
- return ci->ci_biglock_wanted != l;
+ /* Not running. It may be safe to block now. */
+ return false;
}
#endif /* MULTIPROCESSOR */
@@ -434,7 +430,6 @@
uintptr_t owner, curthread;
turnstile_t *ts;
#ifdef MULTIPROCESSOR
- struct cpu_info *ci = NULL;
u_int count;
#endif
#ifdef KERN_SA
@@ -513,6 +508,7 @@
* determine that the owner is not running on a processor,
* then we stop spinning, and sleep instead.
*/
+ KPREEMPT_DISABLE(curlwp);
for (owner = mtx->mtx_owner;;) {
if (!MUTEX_OWNED(owner)) {
/*
@@ -529,27 +525,28 @@
owner = mtx->mtx_owner;
continue;
}
-
- if (__predict_false(panicstr != NULL))
+ if (__predict_false(panicstr != NULL)) {
+ kpreempt_enable();
return;
- if (__predict_false(MUTEX_OWNER(owner) == curthread))
+ }
+ if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
MUTEX_ABORT(mtx, "locking against myself");
-
+ }
#ifdef MULTIPROCESSOR
/*
* Check to see if the owner is running on a processor.
* If so, then we should just spin, as the owner will
* likely release the lock very soon.
*/
- if (mutex_onproc(owner, &ci)) {
+ if (mutex_oncpu(owner)) {
LOCKSTAT_START_TIMER(lsflag, spintime);
count = SPINLOCK_BACKOFF_MIN;
- for (;;) {
+ do {
+ kpreempt_enable();
SPINLOCK_BACKOFF(count);
+ kpreempt_disable();
owner = mtx->mtx_owner;
- if (!mutex_onproc(owner, &ci))
- break;
- }
+ } while (mutex_oncpu(owner));
LOCKSTAT_STOP_TIMER(lsflag, spintime);
LOCKSTAT_COUNT(spincnt, 1);
if (!MUTEX_OWNED(owner))
@@ -589,7 +586,7 @@
* .. clear lock word, waiters
* return success
*
- * There is a another race that can occur: a third CPU could
+ * There is another race that can occur: a third CPU could
* acquire the mutex as soon as it is released. Since
* adaptive mutexes are primarily spin mutexes, this is not
* something that we need to worry about too much. What we
@@ -634,23 +631,23 @@
* waiters field) and check the lock holder's status again.
* Some of the possible outcomes (not an exhaustive list):
*
- * 1. The onproc check returns true: the holding LWP is
+ * 1. The on-CPU check returns true: the holding LWP is
* running again. The lock may be released soon and
* we should spin. Importantly, we can't trust the
* value of the waiters flag.
*
- * 2. The onproc check returns false: the holding LWP is
+ * 2. The on-CPU check returns false: the holding LWP is
* not running. We now have the opportunity to check
* if mutex_exit() has blatted the modifications made
* by MUTEX_SET_WAITERS().
*
- * 3. The onproc check returns false: the holding LWP may
+ * 3. The on-CPU check returns false: the holding LWP may
* or may not be running. It has context switched at
* some point during our check. Again, we have the
* chance to see if the waiters bit is still set or
* has been overwritten.
*
- * 4. The onproc check returns false: the holding LWP is
+ * 4. The on-CPU check returns false: the holding LWP is
* running on a CPU, but wants the big lock. It's OK
* to check the waiters field in this case.
*
@@ -664,7 +661,7 @@
* If the waiters bit is not set it's unsafe to go asleep,
* as we might never be awoken.
*/
- if ((membar_consumer(), mutex_onproc(owner, &ci)) ||
+ if ((membar_consumer(), mutex_oncpu(owner)) ||
(membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) {
turnstile_exit(mtx);
owner = mtx->mtx_owner;
@@ -695,6 +692,7 @@
owner = mtx->mtx_owner;
}
+ KPREEMPT_ENABLE(curlwp);
LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
slpcnt, slptime);
Index: src/sys/kern/kern_rwlock.c
diff -u src/sys/kern/kern_rwlock.c:1.36 src/sys/kern/kern_rwlock.c:1.37
--- src/sys/kern/kern_rwlock.c:1.36 Mon Feb 8 09:54:27 2010
+++ src/sys/kern/kern_rwlock.c Sun Mar 20 23:19:16 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_rwlock.c,v 1.36 2010/02/08 09:54:27 skrll Exp $ */
+/* $NetBSD: kern_rwlock.c,v 1.37 2011/03/20 23:19:16 rmind Exp $ */
/*-
* Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.36 2010/02/08 09:54:27 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.37 2011/03/20 23:19:16 rmind Exp $");
#define __RWLOCK_PRIVATE
@@ -223,46 +223,39 @@
}
/*
- * rw_onproc:
+ * rw_oncpu:
*
* Return true if an rwlock owner is running on a CPU in the system.
* If the target is waiting on the kernel big lock, then we must
* release it. This is necessary to avoid deadlock.
- *
- * Note that we can't use the rwlock owner field as an LWP pointer. We
- * don't have full control over the timing of our execution, and so the
- * pointer could be completely invalid by the time we dereference it.
*/
-static int
-rw_onproc(uintptr_t owner, struct cpu_info **cip)
+static bool
+rw_oncpu(uintptr_t owner)
{
#ifdef MULTIPROCESSOR
- CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
lwp_t *l;
- if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED)
- return 0;
+ KASSERT(kpreempt_disabled());
+
+ if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
+ return false;
+ }
+
+ /*
+ * See lwp_dtor() why dereference of the LWP pointer is safe.
+ * We must have kernel preemption disabled for that.
+ */
l = (lwp_t *)(owner & RW_THREAD);
+ ci = l->l_cpu;
- /* See if the target is running on a CPU somewhere. */
- if ((ci = *cip) != NULL && ci->ci_curlwp == l)
- goto run;
- for (CPU_INFO_FOREACH(cii, ci))
- if (ci->ci_curlwp == l)
- goto run;
-
- /* No: it may be safe to block now. */
- *cip = NULL;
- return 0;
-
- run:
- /* Target is running; do we need to block? */
- *cip = ci;
- return ci->ci_biglock_wanted != l;
-#else
- return 0;
-#endif /* MULTIPROCESSOR */
+ if (ci && ci->ci_curlwp == l) {
+ /* Target is running; do we need to block? */
+ return (ci->ci_biglock_wanted != l);
+ }
+#endif
+ /* Not running. It may be safe to block now. */
+ return false;
}
/*
@@ -274,7 +267,6 @@
rw_vector_enter(krwlock_t *rw, const krw_t op)
{
uintptr_t owner, incr, need_wait, set_wait, curthread, next;
- struct cpu_info *ci;
turnstile_t *ts;
int queue;
lwp_t *l;
@@ -319,7 +311,8 @@
LOCKSTAT_ENTER(lsflag);
- for (ci = NULL, owner = rw->rw_owner;;) {
+ KPREEMPT_DISABLE(curlwp);
+ for (owner = rw->rw_owner; ;) {
/*
* Read the lock owner field. If the need-to-wait
* indicator is clear, then try to acquire the lock.
@@ -340,23 +333,26 @@
owner = next;
continue;
}
-
- if (__predict_false(panicstr != NULL))
+ if (__predict_false(panicstr != NULL)) {
+ kpreempt_enable();
return;
- if (__predict_false(RW_OWNER(rw) == curthread))
+ }
+ if (__predict_false(RW_OWNER(rw) == curthread)) {
rw_abort(rw, __func__, "locking against myself");
-
+ }
/*
* If the lock owner is running on another CPU, and
* there are no existing waiters, then spin.
*/
- if (rw_onproc(owner, &ci)) {
+ if (rw_oncpu(owner)) {
LOCKSTAT_START_TIMER(lsflag, spintime);
u_int count = SPINLOCK_BACKOFF_MIN;
do {
+ kpreempt_enable();
SPINLOCK_BACKOFF(count);
+ kpreempt_disable();
owner = rw->rw_owner;
- } while (rw_onproc(owner, &ci));
+ } while (rw_oncpu(owner));
LOCKSTAT_STOP_TIMER(lsflag, spintime);
LOCKSTAT_COUNT(spincnt, 1);
if ((owner & need_wait) == 0)
@@ -376,7 +372,7 @@
* spun on the turnstile chain lock.
*/
owner = rw->rw_owner;
- if ((owner & need_wait) == 0 || rw_onproc(owner, &ci)) {
+ if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
turnstile_exit(rw);
continue;
}
@@ -399,6 +395,7 @@
if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
break;
}
+ KPREEMPT_ENABLE(curlwp);
LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK |
(op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime);