Module Name: src Committed By: rmind Date: Sun Mar 20 23:19:16 UTC 2011
Modified Files: src/sys/kern: kern_lwp.c kern_mutex.c kern_rwlock.c Log Message: Optimise mutex_onproc() and rw_onproc() by making them O(1), instead of O(ncpu) for adaptive paths. Add an LWP destructor, lwp_dtor() with a comment describing the principle of this barrier. Reviewed by yamt@ and ad@. To generate a diff of this commit: cvs rdiff -u -r1.156 -r1.157 src/sys/kern/kern_lwp.c cvs rdiff -u -r1.49 -r1.50 src/sys/kern/kern_mutex.c cvs rdiff -u -r1.36 -r1.37 src/sys/kern/kern_rwlock.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_lwp.c diff -u src/sys/kern/kern_lwp.c:1.156 src/sys/kern/kern_lwp.c:1.157 --- src/sys/kern/kern_lwp.c:1.156 Mon Feb 21 20:23:28 2011 +++ src/sys/kern/kern_lwp.c Sun Mar 20 23:19:16 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_lwp.c,v 1.156 2011/02/21 20:23:28 pooka Exp $ */ +/* $NetBSD: kern_lwp.c,v 1.157 2011/03/20 23:19:16 rmind Exp $ */ /*- * Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -211,7 +211,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.156 2011/02/21 20:23:28 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.157 2011/03/20 23:19:16 rmind Exp $"); #include "opt_ddb.h" #include "opt_lockdebug.h" @@ -240,6 +240,7 @@ #include <sys/filedesc.h> #include <sys/dtrace_bsd.h> #include <sys/sdt.h> +#include <sys/xcall.h> #include <uvm/uvm_extern.h> #include <uvm/uvm_object.h> @@ -247,6 +248,8 @@ static pool_cache_t lwp_cache __read_mostly; struct lwplist alllwp __cacheline_aligned; +static void lwp_dtor(void *, void *); + /* DTrace proc provider probes */ SDT_PROBE_DEFINE(proc,,,lwp_create, "struct lwp *", NULL, @@ -293,7 +296,7 @@ lwpinit_specificdata(); lwp_sys_init(); lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0, 0, - "lwppl", NULL, IPL_NONE, NULL, NULL, NULL); + "lwppl", NULL, IPL_NONE, NULL, lwp_dtor, NULL); } void @@ -318,6 +321,26 @@ SYSCALL_TIME_LWP_INIT(l); } +static void +lwp_dtor(void *arg, void *obj) +{ + lwp_t *l = obj; + uint64_t where; + (void)l; + + /* + * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu() + * calls will exit before memory of LWP is returned to the pool, where + * KVA of LWP structure might be freed and re-used for other purposes. + * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu() + * callers, therefore cross-call to all CPUs will do the job. Also, + * the value of l->l_cpu must be still valid at this point. + */ + KASSERT(l->l_cpu != NULL); + where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); + xc_wait(where); +} + /* * Set an suspended. * Index: src/sys/kern/kern_mutex.c diff -u src/sys/kern/kern_mutex.c:1.49 src/sys/kern/kern_mutex.c:1.50 --- src/sys/kern/kern_mutex.c:1.49 Mon Feb 8 09:54:27 2010 +++ src/sys/kern/kern_mutex.c Sun Mar 20 23:19:16 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_mutex.c,v 1.49 2010/02/08 09:54:27 skrll Exp $ */ +/* $NetBSD: kern_mutex.c,v 1.50 2011/03/20 23:19:16 rmind Exp $ */ /*- * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -40,7 +40,7 @@ #define __MUTEX_PRIVATE #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.49 2010/02/08 09:54:27 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.50 2011/03/20 23:19:16 rmind Exp $"); #include <sys/param.h> #include <sys/atomic.h> @@ -53,6 +53,7 @@ #include <sys/kernel.h> #include <sys/intr.h> #include <sys/lock.h> +#include <sys/types.h> #include <dev/lockstat.h> @@ -249,9 +250,8 @@ __strong_alias(mutex_spin_exit,mutex_vector_exit); #endif -void mutex_abort(kmutex_t *, const char *, const char *); -void mutex_dump(volatile void *); -int mutex_onproc(uintptr_t, struct cpu_info **); +static void mutex_abort(kmutex_t *, const char *, const char *); +static void mutex_dump(volatile void *); lockops_t mutex_spin_lockops = { "Mutex", @@ -379,44 +379,40 @@ MUTEX_DESTROY(mtx); } +#ifdef MULTIPROCESSOR /* - * mutex_onproc: + * mutex_oncpu: * * Return true if an adaptive mutex owner is running on a CPU in the * system. If the target is waiting on the kernel big lock, then we * must release it. This is necessary to avoid deadlock. - * - * Note that we can't use the mutex owner field as an LWP pointer. We - * don't have full control over the timing of our execution, and so the - * pointer could be completely invalid by the time we dereference it. */ -#ifdef MULTIPROCESSOR -int -mutex_onproc(uintptr_t owner, struct cpu_info **cip) +static bool +mutex_oncpu(uintptr_t owner) { - CPU_INFO_ITERATOR cii; struct cpu_info *ci; - struct lwp *l; + lwp_t *l; - if (!MUTEX_OWNED(owner)) - return 0; - l = (struct lwp *)MUTEX_OWNER(owner); + KASSERT(kpreempt_disabled()); - /* See if the target is running on a CPU somewhere. */ - if ((ci = *cip) != NULL && ci->ci_curlwp == l) - goto run; - for (CPU_INFO_FOREACH(cii, ci)) - if (ci->ci_curlwp == l) - goto run; + if (!MUTEX_OWNED(owner)) { + return false; + } - /* No: it may be safe to block now. */ - *cip = NULL; - return 0; + /* + * See lwp_dtor() why dereference of the LWP pointer is safe. + * We must have kernel preemption disabled for that. + */ + l = (lwp_t *)MUTEX_OWNER(owner); + ci = l->l_cpu; + + if (ci && ci->ci_curlwp == l) { + /* Target is running; do we need to block? */ + return (ci->ci_biglock_wanted != l); + } - run: - /* Target is running; do we need to block? */ - *cip = ci; - return ci->ci_biglock_wanted != l; + /* Not running. It may be safe to block now. */ + return false; } #endif /* MULTIPROCESSOR */ @@ -434,7 +430,6 @@ uintptr_t owner, curthread; turnstile_t *ts; #ifdef MULTIPROCESSOR - struct cpu_info *ci = NULL; u_int count; #endif #ifdef KERN_SA @@ -513,6 +508,7 @@ * determine that the owner is not running on a processor, * then we stop spinning, and sleep instead. */ + KPREEMPT_DISABLE(curlwp); for (owner = mtx->mtx_owner;;) { if (!MUTEX_OWNED(owner)) { /* @@ -529,27 +525,28 @@ owner = mtx->mtx_owner; continue; } - - if (__predict_false(panicstr != NULL)) + if (__predict_false(panicstr != NULL)) { + kpreempt_enable(); return; - if (__predict_false(MUTEX_OWNER(owner) == curthread)) + } + if (__predict_false(MUTEX_OWNER(owner) == curthread)) { MUTEX_ABORT(mtx, "locking against myself"); - + } #ifdef MULTIPROCESSOR /* * Check to see if the owner is running on a processor. * If so, then we should just spin, as the owner will * likely release the lock very soon. */ - if (mutex_onproc(owner, &ci)) { + if (mutex_oncpu(owner)) { LOCKSTAT_START_TIMER(lsflag, spintime); count = SPINLOCK_BACKOFF_MIN; - for (;;) { + do { + kpreempt_enable(); SPINLOCK_BACKOFF(count); + kpreempt_disable(); owner = mtx->mtx_owner; - if (!mutex_onproc(owner, &ci)) - break; - } + } while (mutex_oncpu(owner)); LOCKSTAT_STOP_TIMER(lsflag, spintime); LOCKSTAT_COUNT(spincnt, 1); if (!MUTEX_OWNED(owner)) @@ -589,7 +586,7 @@ * .. clear lock word, waiters * return success * - * There is a another race that can occur: a third CPU could + * There is another race that can occur: a third CPU could * acquire the mutex as soon as it is released. Since * adaptive mutexes are primarily spin mutexes, this is not * something that we need to worry about too much. What we @@ -634,23 +631,23 @@ * waiters field) and check the lock holder's status again. * Some of the possible outcomes (not an exhaustive list): * - * 1. The onproc check returns true: the holding LWP is + * 1. The on-CPU check returns true: the holding LWP is * running again. The lock may be released soon and * we should spin. Importantly, we can't trust the * value of the waiters flag. * - * 2. The onproc check returns false: the holding LWP is + * 2. The on-CPU check returns false: the holding LWP is * not running. We now have the opportunity to check * if mutex_exit() has blatted the modifications made * by MUTEX_SET_WAITERS(). * - * 3. The onproc check returns false: the holding LWP may + * 3. The on-CPU check returns false: the holding LWP may * or may not be running. It has context switched at * some point during our check. Again, we have the * chance to see if the waiters bit is still set or * has been overwritten. * - * 4. The onproc check returns false: the holding LWP is + * 4. The on-CPU check returns false: the holding LWP is * running on a CPU, but wants the big lock. It's OK * to check the waiters field in this case. * @@ -664,7 +661,7 @@ * If the waiters bit is not set it's unsafe to go asleep, * as we might never be awoken. */ - if ((membar_consumer(), mutex_onproc(owner, &ci)) || + if ((membar_consumer(), mutex_oncpu(owner)) || (membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) { turnstile_exit(mtx); owner = mtx->mtx_owner; @@ -695,6 +692,7 @@ owner = mtx->mtx_owner; } + KPREEMPT_ENABLE(curlwp); LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1, slpcnt, slptime); Index: src/sys/kern/kern_rwlock.c diff -u src/sys/kern/kern_rwlock.c:1.36 src/sys/kern/kern_rwlock.c:1.37 --- src/sys/kern/kern_rwlock.c:1.36 Mon Feb 8 09:54:27 2010 +++ src/sys/kern/kern_rwlock.c Sun Mar 20 23:19:16 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_rwlock.c,v 1.36 2010/02/08 09:54:27 skrll Exp $ */ +/* $NetBSD: kern_rwlock.c,v 1.37 2011/03/20 23:19:16 rmind Exp $ */ /*- * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.36 2010/02/08 09:54:27 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.37 2011/03/20 23:19:16 rmind Exp $"); #define __RWLOCK_PRIVATE @@ -223,46 +223,39 @@ } /* - * rw_onproc: + * rw_oncpu: * * Return true if an rwlock owner is running on a CPU in the system. * If the target is waiting on the kernel big lock, then we must * release it. This is necessary to avoid deadlock. - * - * Note that we can't use the rwlock owner field as an LWP pointer. We - * don't have full control over the timing of our execution, and so the - * pointer could be completely invalid by the time we dereference it. */ -static int -rw_onproc(uintptr_t owner, struct cpu_info **cip) +static bool +rw_oncpu(uintptr_t owner) { #ifdef MULTIPROCESSOR - CPU_INFO_ITERATOR cii; struct cpu_info *ci; lwp_t *l; - if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) - return 0; + KASSERT(kpreempt_disabled()); + + if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) { + return false; + } + + /* + * See lwp_dtor() why dereference of the LWP pointer is safe. + * We must have kernel preemption disabled for that. + */ l = (lwp_t *)(owner & RW_THREAD); + ci = l->l_cpu; - /* See if the target is running on a CPU somewhere. */ - if ((ci = *cip) != NULL && ci->ci_curlwp == l) - goto run; - for (CPU_INFO_FOREACH(cii, ci)) - if (ci->ci_curlwp == l) - goto run; - - /* No: it may be safe to block now. */ - *cip = NULL; - return 0; - - run: - /* Target is running; do we need to block? */ - *cip = ci; - return ci->ci_biglock_wanted != l; -#else - return 0; -#endif /* MULTIPROCESSOR */ + if (ci && ci->ci_curlwp == l) { + /* Target is running; do we need to block? */ + return (ci->ci_biglock_wanted != l); + } +#endif + /* Not running. It may be safe to block now. */ + return false; } /* @@ -274,7 +267,6 @@ rw_vector_enter(krwlock_t *rw, const krw_t op) { uintptr_t owner, incr, need_wait, set_wait, curthread, next; - struct cpu_info *ci; turnstile_t *ts; int queue; lwp_t *l; @@ -319,7 +311,8 @@ LOCKSTAT_ENTER(lsflag); - for (ci = NULL, owner = rw->rw_owner;;) { + KPREEMPT_DISABLE(curlwp); + for (owner = rw->rw_owner; ;) { /* * Read the lock owner field. If the need-to-wait * indicator is clear, then try to acquire the lock. @@ -340,23 +333,26 @@ owner = next; continue; } - - if (__predict_false(panicstr != NULL)) + if (__predict_false(panicstr != NULL)) { + kpreempt_enable(); return; - if (__predict_false(RW_OWNER(rw) == curthread)) + } + if (__predict_false(RW_OWNER(rw) == curthread)) { rw_abort(rw, __func__, "locking against myself"); - + } /* * If the lock owner is running on another CPU, and * there are no existing waiters, then spin. */ - if (rw_onproc(owner, &ci)) { + if (rw_oncpu(owner)) { LOCKSTAT_START_TIMER(lsflag, spintime); u_int count = SPINLOCK_BACKOFF_MIN; do { + kpreempt_enable(); SPINLOCK_BACKOFF(count); + kpreempt_disable(); owner = rw->rw_owner; - } while (rw_onproc(owner, &ci)); + } while (rw_oncpu(owner)); LOCKSTAT_STOP_TIMER(lsflag, spintime); LOCKSTAT_COUNT(spincnt, 1); if ((owner & need_wait) == 0) @@ -376,7 +372,7 @@ * spun on the turnstile chain lock. */ owner = rw->rw_owner; - if ((owner & need_wait) == 0 || rw_onproc(owner, &ci)) { + if ((owner & need_wait) == 0 || rw_oncpu(owner)) { turnstile_exit(rw); continue; } @@ -399,6 +395,7 @@ if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) break; } + KPREEMPT_ENABLE(curlwp); LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime);