Module Name:    src
Committed By:   rmind
Date:           Sun Mar 20 23:19:16 UTC 2011

Modified Files:
        src/sys/kern: kern_lwp.c kern_mutex.c kern_rwlock.c

Log Message:
Optimise mutex_onproc() and rw_onproc() by making them O(1), instead
of O(ncpu) for adaptive paths.  Add an LWP destructor, lwp_dtor() with
a comment describing the principle of this barrier.

Reviewed by yamt@ and ad@.


To generate a diff of this commit:
cvs rdiff -u -r1.156 -r1.157 src/sys/kern/kern_lwp.c
cvs rdiff -u -r1.49 -r1.50 src/sys/kern/kern_mutex.c
cvs rdiff -u -r1.36 -r1.37 src/sys/kern/kern_rwlock.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_lwp.c
diff -u src/sys/kern/kern_lwp.c:1.156 src/sys/kern/kern_lwp.c:1.157
--- src/sys/kern/kern_lwp.c:1.156	Mon Feb 21 20:23:28 2011
+++ src/sys/kern/kern_lwp.c	Sun Mar 20 23:19:16 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_lwp.c,v 1.156 2011/02/21 20:23:28 pooka Exp $	*/
+/*	$NetBSD: kern_lwp.c,v 1.157 2011/03/20 23:19:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -211,7 +211,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.156 2011/02/21 20:23:28 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.157 2011/03/20 23:19:16 rmind Exp $");
 
 #include "opt_ddb.h"
 #include "opt_lockdebug.h"
@@ -240,6 +240,7 @@
 #include <sys/filedesc.h>
 #include <sys/dtrace_bsd.h>
 #include <sys/sdt.h>
+#include <sys/xcall.h>
 
 #include <uvm/uvm_extern.h>
 #include <uvm/uvm_object.h>
@@ -247,6 +248,8 @@
 static pool_cache_t	lwp_cache	__read_mostly;
 struct lwplist		alllwp		__cacheline_aligned;
 
+static void		lwp_dtor(void *, void *);
+
 /* DTrace proc provider probes */
 SDT_PROBE_DEFINE(proc,,,lwp_create,
 	"struct lwp *", NULL,
@@ -293,7 +296,7 @@
 	lwpinit_specificdata();
 	lwp_sys_init();
 	lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0,
-	    "lwppl", NULL, IPL_NONE, NULL, NULL, NULL);
+	    "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL);
 }
 
 void
@@ -318,6 +321,26 @@
 	SYSCALL_TIME_LWP_INIT(l);
 }
 
+static void
+lwp_dtor(void *arg, void *obj)
+{
+	lwp_t *l = obj;
+	uint64_t where;
+	(void)l;
+
+	/*
+	 * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu()
+	 * calls will exit before memory of LWP is returned to the pool, where
+	 * KVA of LWP structure might be freed and re-used for other purposes.
+	 * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu()
+	 * callers, therefore cross-call to all CPUs will do the job.  Also,
+	 * the value of l->l_cpu must be still valid at this point.
+	 */
+	KASSERT(l->l_cpu != NULL);
+	where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
+	xc_wait(where);
+}
+
 /*
  * Set an suspended.
  *

Index: src/sys/kern/kern_mutex.c
diff -u src/sys/kern/kern_mutex.c:1.49 src/sys/kern/kern_mutex.c:1.50
--- src/sys/kern/kern_mutex.c:1.49	Mon Feb  8 09:54:27 2010
+++ src/sys/kern/kern_mutex.c	Sun Mar 20 23:19:16 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_mutex.c,v 1.49 2010/02/08 09:54:27 skrll Exp $	*/
+/*	$NetBSD: kern_mutex.c,v 1.50 2011/03/20 23:19:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -40,7 +40,7 @@
 #define	__MUTEX_PRIVATE
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.49 2010/02/08 09:54:27 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.50 2011/03/20 23:19:16 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
@@ -53,6 +53,7 @@
 #include <sys/kernel.h>
 #include <sys/intr.h>
 #include <sys/lock.h>
+#include <sys/types.h>
 
 #include <dev/lockstat.h>
 
@@ -249,9 +250,8 @@
 __strong_alias(mutex_spin_exit,mutex_vector_exit);
 #endif
 
-void	mutex_abort(kmutex_t *, const char *, const char *);
-void	mutex_dump(volatile void *);
-int	mutex_onproc(uintptr_t, struct cpu_info **);
+static void		mutex_abort(kmutex_t *, const char *, const char *);
+static void		mutex_dump(volatile void *);
 
 lockops_t mutex_spin_lockops = {
 	"Mutex",
@@ -379,44 +379,40 @@
 	MUTEX_DESTROY(mtx);
 }
 
+#ifdef MULTIPROCESSOR
 /*
- * mutex_onproc:
+ * mutex_oncpu:
  *
  *	Return true if an adaptive mutex owner is running on a CPU in the
  *	system.  If the target is waiting on the kernel big lock, then we
  *	must release it.  This is necessary to avoid deadlock.
- *
- *	Note that we can't use the mutex owner field as an LWP pointer.  We
- *	don't have full control over the timing of our execution, and so the
- *	pointer could be completely invalid by the time we dereference it.
  */
-#ifdef MULTIPROCESSOR
-int
-mutex_onproc(uintptr_t owner, struct cpu_info **cip)
+static bool
+mutex_oncpu(uintptr_t owner)
 {
-	CPU_INFO_ITERATOR cii;
 	struct cpu_info *ci;
-	struct lwp *l;
+	lwp_t *l;
 
-	if (!MUTEX_OWNED(owner))
-		return 0;
-	l = (struct lwp *)MUTEX_OWNER(owner);
+	KASSERT(kpreempt_disabled());
 
-	/* See if the target is running on a CPU somewhere. */
-	if ((ci = *cip) != NULL && ci->ci_curlwp == l)
-		goto run;
-	for (CPU_INFO_FOREACH(cii, ci))
-		if (ci->ci_curlwp == l)
-			goto run;
+	if (!MUTEX_OWNED(owner)) {
+		return false;
+	}
 
-	/* No: it may be safe to block now. */
-	*cip = NULL;
-	return 0;
+	/*
+	 * See lwp_dtor() why dereference of the LWP pointer is safe.
+	 * We must have kernel preemption disabled for that.
+	 */
+	l = (lwp_t *)MUTEX_OWNER(owner);
+	ci = l->l_cpu;
+
+	if (ci && ci->ci_curlwp == l) {
+		/* Target is running; do we need to block? */
+		return (ci->ci_biglock_wanted != l);
+	}
 
- run:
- 	/* Target is running; do we need to block? */
- 	*cip = ci;
-	return ci->ci_biglock_wanted != l;
+	/* Not running.  It may be safe to block now. */
+	return false;
 }
 #endif	/* MULTIPROCESSOR */
 
@@ -434,7 +430,6 @@
 	uintptr_t owner, curthread;
 	turnstile_t *ts;
 #ifdef MULTIPROCESSOR
-	struct cpu_info *ci = NULL;
 	u_int count;
 #endif
 #ifdef KERN_SA
@@ -513,6 +508,7 @@
 	 * determine that the owner is not running on a processor,
 	 * then we stop spinning, and sleep instead.
 	 */
+	KPREEMPT_DISABLE(curlwp);
 	for (owner = mtx->mtx_owner;;) {
 		if (!MUTEX_OWNED(owner)) {
 			/*
@@ -529,27 +525,28 @@
 			owner = mtx->mtx_owner;
 			continue;
 		}
-
-		if (__predict_false(panicstr != NULL))
+		if (__predict_false(panicstr != NULL)) {
+			kpreempt_enable();
 			return;
-		if (__predict_false(MUTEX_OWNER(owner) == curthread))
+		}
+		if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
 			MUTEX_ABORT(mtx, "locking against myself");
-
+		}
 #ifdef MULTIPROCESSOR
 		/*
 		 * Check to see if the owner is running on a processor.
 		 * If so, then we should just spin, as the owner will
 		 * likely release the lock very soon.
 		 */
-		if (mutex_onproc(owner, &ci)) {
+		if (mutex_oncpu(owner)) {
 			LOCKSTAT_START_TIMER(lsflag, spintime);
 			count = SPINLOCK_BACKOFF_MIN;
-			for (;;) {
+			do {
+				kpreempt_enable();
 				SPINLOCK_BACKOFF(count);
+				kpreempt_disable();
 				owner = mtx->mtx_owner;
-				if (!mutex_onproc(owner, &ci))
-					break;
-			}
+			} while (mutex_oncpu(owner));
 			LOCKSTAT_STOP_TIMER(lsflag, spintime);
 			LOCKSTAT_COUNT(spincnt, 1);
 			if (!MUTEX_OWNED(owner))
@@ -589,7 +586,7 @@
 		 *		..	          clear lock word, waiters 
 		 *	  return success
 		 *
-		 * There is a another race that can occur: a third CPU could
+		 * There is another race that can occur: a third CPU could
 		 * acquire the mutex as soon as it is released.  Since
 		 * adaptive mutexes are primarily spin mutexes, this is not
 		 * something that we need to worry about too much.  What we
@@ -634,23 +631,23 @@
 		 * waiters field) and check the lock holder's status again.
 		 * Some of the possible outcomes (not an exhaustive list):
 		 *
-		 * 1. The onproc check returns true: the holding LWP is
+		 * 1. The on-CPU check returns true: the holding LWP is
 		 *    running again.  The lock may be released soon and
 		 *    we should spin.  Importantly, we can't trust the
 		 *    value of the waiters flag.
 		 *
-		 * 2. The onproc check returns false: the holding LWP is
+		 * 2. The on-CPU check returns false: the holding LWP is
 		 *    not running.  We now have the opportunity to check
 		 *    if mutex_exit() has blatted the modifications made
 		 *    by MUTEX_SET_WAITERS().
 		 *
-		 * 3. The onproc check returns false: the holding LWP may
+		 * 3. The on-CPU check returns false: the holding LWP may
 		 *    or may not be running.  It has context switched at
 		 *    some point during our check.  Again, we have the
 		 *    chance to see if the waiters bit is still set or
 		 *    has been overwritten.
 		 *
-		 * 4. The onproc check returns false: the holding LWP is
+		 * 4. The on-CPU check returns false: the holding LWP is
 		 *    running on a CPU, but wants the big lock.  It's OK
 		 *    to check the waiters field in this case.
 		 *
@@ -664,7 +661,7 @@
 		 * If the waiters bit is not set it's unsafe to go asleep,
 		 * as we might never be awoken.
 		 */
-		if ((membar_consumer(), mutex_onproc(owner, &ci)) ||
+		if ((membar_consumer(), mutex_oncpu(owner)) ||
 		    (membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) {
 			turnstile_exit(mtx);
 			owner = mtx->mtx_owner;
@@ -695,6 +692,7 @@
 
 		owner = mtx->mtx_owner;
 	}
+	KPREEMPT_ENABLE(curlwp);
 
 	LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
 	    slpcnt, slptime);

Index: src/sys/kern/kern_rwlock.c
diff -u src/sys/kern/kern_rwlock.c:1.36 src/sys/kern/kern_rwlock.c:1.37
--- src/sys/kern/kern_rwlock.c:1.36	Mon Feb  8 09:54:27 2010
+++ src/sys/kern/kern_rwlock.c	Sun Mar 20 23:19:16 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_rwlock.c,v 1.36 2010/02/08 09:54:27 skrll Exp $	*/
+/*	$NetBSD: kern_rwlock.c,v 1.37 2011/03/20 23:19:16 rmind Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.36 2010/02/08 09:54:27 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.37 2011/03/20 23:19:16 rmind Exp $");
 
 #define	__RWLOCK_PRIVATE
 
@@ -223,46 +223,39 @@
 }
 
 /*
- * rw_onproc:
+ * rw_oncpu:
  *
  *	Return true if an rwlock owner is running on a CPU in the system.
  *	If the target is waiting on the kernel big lock, then we must
  *	release it.  This is necessary to avoid deadlock.
- *
- *	Note that we can't use the rwlock owner field as an LWP pointer.  We
- *	don't have full control over the timing of our execution, and so the
- *	pointer could be completely invalid by the time we dereference it.
  */
-static int
-rw_onproc(uintptr_t owner, struct cpu_info **cip)
+static bool
+rw_oncpu(uintptr_t owner)
 {
 #ifdef MULTIPROCESSOR
-	CPU_INFO_ITERATOR cii;
 	struct cpu_info *ci;
 	lwp_t *l;
 
-	if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED)
-		return 0;
+	KASSERT(kpreempt_disabled());
+
+	if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
+		return false;
+	}
+
+	/*
+	 * See lwp_dtor() why dereference of the LWP pointer is safe.
+	 * We must have kernel preemption disabled for that.
+	 */
 	l = (lwp_t *)(owner & RW_THREAD);
+	ci = l->l_cpu;
 
-	/* See if the target is running on a CPU somewhere. */
-	if ((ci = *cip) != NULL && ci->ci_curlwp == l)
-		goto run;
-	for (CPU_INFO_FOREACH(cii, ci))
-		if (ci->ci_curlwp == l)
-			goto run;
-
-	/* No: it may be safe to block now. */
-	*cip = NULL;
-	return 0;
-
- run:
- 	/* Target is running; do we need to block? */
- 	*cip = ci;
-	return ci->ci_biglock_wanted != l;
-#else
-	return 0;
-#endif	/* MULTIPROCESSOR */
+	if (ci && ci->ci_curlwp == l) {
+		/* Target is running; do we need to block? */
+		return (ci->ci_biglock_wanted != l);
+	}
+#endif
+	/* Not running.  It may be safe to block now. */
+	return false;
 }
 
 /*
@@ -274,7 +267,6 @@
 rw_vector_enter(krwlock_t *rw, const krw_t op)
 {
 	uintptr_t owner, incr, need_wait, set_wait, curthread, next;
-	struct cpu_info *ci;
 	turnstile_t *ts;
 	int queue;
 	lwp_t *l;
@@ -319,7 +311,8 @@
 
 	LOCKSTAT_ENTER(lsflag);
 
-	for (ci = NULL, owner = rw->rw_owner;;) {
+	KPREEMPT_DISABLE(curlwp);
+	for (owner = rw->rw_owner; ;) {
 		/*
 		 * Read the lock owner field.  If the need-to-wait
 		 * indicator is clear, then try to acquire the lock.
@@ -340,23 +333,26 @@
 			owner = next;
 			continue;
 		}
-
-		if (__predict_false(panicstr != NULL))
+		if (__predict_false(panicstr != NULL)) {
+			kpreempt_enable();
 			return;
-		if (__predict_false(RW_OWNER(rw) == curthread))
+		}
+		if (__predict_false(RW_OWNER(rw) == curthread)) {
 			rw_abort(rw, __func__, "locking against myself");
-
+		}
 		/*
 		 * If the lock owner is running on another CPU, and
 		 * there are no existing waiters, then spin.
 		 */
-		if (rw_onproc(owner, &ci)) {
+		if (rw_oncpu(owner)) {
 			LOCKSTAT_START_TIMER(lsflag, spintime);
 			u_int count = SPINLOCK_BACKOFF_MIN;
 			do {
+				kpreempt_enable();
 				SPINLOCK_BACKOFF(count);
+				kpreempt_disable();
 				owner = rw->rw_owner;
-			} while (rw_onproc(owner, &ci));
+			} while (rw_oncpu(owner));
 			LOCKSTAT_STOP_TIMER(lsflag, spintime);
 			LOCKSTAT_COUNT(spincnt, 1);
 			if ((owner & need_wait) == 0)
@@ -376,7 +372,7 @@
 		 * spun on the turnstile chain lock.
 		 */
 		owner = rw->rw_owner;
-		if ((owner & need_wait) == 0 || rw_onproc(owner, &ci)) {
+		if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
 			turnstile_exit(rw);
 			continue;
 		}
@@ -399,6 +395,7 @@
 		if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
 			break;
 	}
+	KPREEMPT_ENABLE(curlwp);
 
 	LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK |
 	    (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime);

Reply via email to