Module Name: src Committed By: ad Date: Sun Jan 19 21:08:30 UTC 2020
Modified Files: src/sys/arch/amd64/amd64 [ad-namecache]: genassym.cf lock_stubs.S src/sys/arch/i386/i386 [ad-namecache]: genassym.cf lock_stubs.S src/sys/kern [ad-namecache]: kern_rwlock.c kern_synch.c src/sys/sys [ad-namecache]: lwp.h Log Message: Adaptive rwlocks proposed on tech-kern and working well on this branch with vnode locks. To generate a diff of this commit: cvs rdiff -u -r1.80.2.1 -r1.80.2.2 src/sys/arch/amd64/amd64/genassym.cf cvs rdiff -u -r1.35 -r1.35.2.1 src/sys/arch/amd64/amd64/lock_stubs.S cvs rdiff -u -r1.117.2.1 -r1.117.2.2 src/sys/arch/i386/i386/genassym.cf cvs rdiff -u -r1.32 -r1.32.2.1 src/sys/arch/i386/i386/lock_stubs.S cvs rdiff -u -r1.59.2.2 -r1.59.2.3 src/sys/kern/kern_rwlock.c cvs rdiff -u -r1.334.2.1 -r1.334.2.2 src/sys/kern/kern_synch.c cvs rdiff -u -r1.192.2.1 -r1.192.2.2 src/sys/sys/lwp.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/amd64/genassym.cf diff -u src/sys/arch/amd64/amd64/genassym.cf:1.80.2.1 src/sys/arch/amd64/amd64/genassym.cf:1.80.2.2 --- src/sys/arch/amd64/amd64/genassym.cf:1.80.2.1 Fri Jan 17 21:47:22 2020 +++ src/sys/arch/amd64/amd64/genassym.cf Sun Jan 19 21:08:29 2020 @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.80.2.1 2020/01/17 21:47:22 ad Exp $ +# $NetBSD: genassym.cf,v 1.80.2.2 2020/01/19 21:08:29 ad Exp $ # # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -78,7 +78,6 @@ include <sys/resourcevar.h> include <sys/device.h> include <sys/mbuf.h> include <sys/mutex.h> -include <sys/rwlock.h> include <sys/cpu_data.h> include <sys/evcnt.h> include <sys/cpu.h> @@ -346,15 +345,6 @@ define MTX_IPL offsetof(struct kmutex, define MTX_LOCK offsetof(struct kmutex, u.s.mtxs_lock) define MTX_OWNER offsetof(struct kmutex, u.mtxa_owner) -define RW_OWNER offsetof(struct krwlock, rw_owner) -define RW_WRITE_LOCKED RW_WRITE_LOCKED -define RW_WRITE_WANTED RW_WRITE_WANTED -define RW_READ_INCR RW_READ_INCR -define RW_HAS_WAITERS RW_HAS_WAITERS -define RW_THREAD RW_THREAD -define RW_READER RW_READER -define RW_WRITER RW_WRITER - define EV_COUNT offsetof(struct evcnt, ev_count) define OPTERON_MSR_PASSCODE OPTERON_MSR_PASSCODE Index: src/sys/arch/amd64/amd64/lock_stubs.S diff -u src/sys/arch/amd64/amd64/lock_stubs.S:1.35 src/sys/arch/amd64/amd64/lock_stubs.S:1.35.2.1 --- src/sys/arch/amd64/amd64/lock_stubs.S:1.35 Sun Dec 8 20:00:56 2019 +++ src/sys/arch/amd64/amd64/lock_stubs.S Sun Jan 19 21:08:29 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: lock_stubs.S,v 1.35 2019/12/08 20:00:56 ad Exp $ */ +/* $NetBSD: lock_stubs.S,v 1.35.2.1 2020/01/19 21:08:29 ad Exp $ */ /* * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -185,126 +185,6 @@ ENTRY(mutex_spin_exit) END(mutex_spin_exit) -/* - * void rw_enter(krwlock_t *rwl, krw_t op); - * - * Acquire one hold on a RW lock. - */ -ENTRY(rw_enter) - cmpl $RW_READER, %esi - jne 2f - - /* - * Reader: this is the most common case. - */ - movq (%rdi), %rax -0: - testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al - jnz 3f - leaq RW_READ_INCR(%rax), %rdx - LOCK - cmpxchgq %rdx, (%rdi) - jnz 1f - RET -1: - jmp 0b - - /* - * Writer: if the compare-and-set fails, don't bother retrying. - */ -2: movq CPUVAR(CURLWP), %rcx - xorq %rax, %rax - orq $RW_WRITE_LOCKED, %rcx - LOCK - cmpxchgq %rcx, (%rdi) - jnz 3f - RET -3: - jmp _C_LABEL(rw_vector_enter) -END(rw_enter) - -/* - * void rw_exit(krwlock_t *rwl); - * - * Release one hold on a RW lock. - */ -ENTRY(rw_exit) - movq (%rdi), %rax - testb $RW_WRITE_LOCKED, %al - jnz 2f - - /* - * Reader - */ -0: testb $RW_HAS_WAITERS, %al - jnz 3f - cmpq $RW_READ_INCR, %rax - jb 3f - leaq -RW_READ_INCR(%rax), %rdx - LOCK - cmpxchgq %rdx, (%rdi) - jnz 1f - ret -1: - jmp 0b - - /* - * Writer - */ -2: leaq -RW_WRITE_LOCKED(%rax), %rdx - subq CPUVAR(CURLWP), %rdx - jnz 3f - LOCK - cmpxchgq %rdx, (%rdi) - jnz 3f - ret - -3: jmp _C_LABEL(rw_vector_exit) -END(rw_exit) - -/* - * int rw_tryenter(krwlock_t *rwl, krw_t op); - * - * Try to acquire one hold on a RW lock. - */ -ENTRY(rw_tryenter) - cmpl $RW_READER, %esi - jne 2f - - /* - * Reader: this is the most common case. - */ - movq (%rdi), %rax -0: - testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al - jnz 4f - leaq RW_READ_INCR(%rax), %rdx - LOCK - cmpxchgq %rdx, (%rdi) - jnz 1f - movl %edx, %eax /* nonzero */ - RET -1: - jmp 0b - - /* - * Writer: if the compare-and-set fails, don't bother retrying. - */ -2: movq CPUVAR(CURLWP), %rcx - xorq %rax, %rax - orq $RW_WRITE_LOCKED, %rcx - LOCK - cmpxchgq %rcx, (%rdi) - movl $0, %eax - setz %al -3: - RET - ret -4: - xorl %eax, %eax - jmp 3b -END(rw_tryenter) - #endif /* LOCKDEBUG */ /* Index: src/sys/arch/i386/i386/genassym.cf diff -u src/sys/arch/i386/i386/genassym.cf:1.117.2.1 src/sys/arch/i386/i386/genassym.cf:1.117.2.2 --- src/sys/arch/i386/i386/genassym.cf:1.117.2.1 Fri Jan 17 21:47:25 2020 +++ src/sys/arch/i386/i386/genassym.cf Sun Jan 19 21:08:29 2020 @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.117.2.1 2020/01/17 21:47:25 ad Exp $ +# $NetBSD: genassym.cf,v 1.117.2.2 2020/01/19 21:08:29 ad Exp $ # # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -79,7 +79,6 @@ include <sys/resourcevar.h> include <sys/device.h> include <sys/mbuf.h> include <sys/mutex.h> -include <sys/rwlock.h> include <sys/cpu.h> include <netinet/in.h> @@ -354,15 +353,6 @@ define MTX_IPL offsetof(struct kmutex, define MTX_LOCK offsetof(struct kmutex, mtx_lock) define MTX_OWNER offsetof(struct kmutex, u.mtxa_owner) -define RW_OWNER offsetof(struct krwlock, rw_owner) -define RW_WRITE_LOCKED RW_WRITE_LOCKED -define RW_WRITE_WANTED RW_WRITE_WANTED -define RW_READ_INCR RW_READ_INCR -define RW_HAS_WAITERS RW_HAS_WAITERS -define RW_THREAD RW_THREAD -define RW_READER RW_READER -define RW_WRITER RW_WRITER - define EV_COUNT offsetof(struct evcnt, ev_count) define OPTERON_MSR_PASSCODE OPTERON_MSR_PASSCODE Index: src/sys/arch/i386/i386/lock_stubs.S diff -u src/sys/arch/i386/i386/lock_stubs.S:1.32 src/sys/arch/i386/i386/lock_stubs.S:1.32.2.1 --- src/sys/arch/i386/i386/lock_stubs.S:1.32 Sun Dec 8 20:00:56 2019 +++ src/sys/arch/i386/i386/lock_stubs.S Sun Jan 19 21:08:29 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: lock_stubs.S,v 1.32 2019/12/08 20:00:56 ad Exp $ */ +/* $NetBSD: lock_stubs.S,v 1.32.2.1 2020/01/19 21:08:29 ad Exp $ */ /*- * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include <machine/asm.h> -__KERNEL_RCSID(0, "$NetBSD: lock_stubs.S,v 1.32 2019/12/08 20:00:56 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lock_stubs.S,v 1.32.2.1 2020/01/19 21:08:29 ad Exp $"); #include "opt_lockdebug.h" @@ -97,132 +97,6 @@ ENTRY(mutex_exit) jmp _C_LABEL(mutex_vector_exit) END(mutex_exit) -/* - * void rw_enter(krwlock_t *rwl, krw_t op); - * - * Acquire one hold on a RW lock. - */ -ENTRY(rw_enter) - movl 4(%esp), %edx - cmpl $RW_READER, 8(%esp) - jne 2f - - /* - * Reader - */ - movl (%edx), %eax -0: - testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al - jnz 3f - leal RW_READ_INCR(%eax), %ecx - LOCK(2) - cmpxchgl %ecx, (%edx) - jnz 1f - RET(2) -1: - jmp 0b - - /* - * Writer - */ -2: xorl %eax, %eax - movl %fs:CPU_INFO_CURLWP(%eax), %ecx - orl $RW_WRITE_LOCKED, %ecx - LOCK(3) - cmpxchgl %ecx, (%edx) - jnz 3f - RET(3) -3: - jmp _C_LABEL(rw_vector_enter) -END(rw_enter) - -/* - * void rw_exit(krwlock_t *rwl); - * - * Release one hold on a RW lock. - */ -ENTRY(rw_exit) - movl 4(%esp), %edx - movl (%edx), %eax - testb $RW_WRITE_LOCKED, %al - jnz 2f - - /* - * Reader - */ -0: testb $RW_HAS_WAITERS, %al - jnz 3f - cmpl $RW_READ_INCR, %eax - jb 3f - leal -RW_READ_INCR(%eax), %ecx - LOCK(4) - cmpxchgl %ecx, (%edx) - jnz 1f - ret -1: - jmp 0b - - /* - * Writer - */ -2: leal -RW_WRITE_LOCKED(%eax), %ecx - subl CPUVAR(CURLWP), %ecx - jnz 3f - LOCK(5) - cmpxchgl %ecx, (%edx) - jnz 3f - ret - - /* - * Slow path. - */ -3: jmp _C_LABEL(rw_vector_exit) -END(rw_exit) - -/* - * int rw_tryenter(krwlock_t *rwl, krw_t op); - * - * Try to acquire one hold on a RW lock. - */ -ENTRY(rw_tryenter) - movl 4(%esp), %edx - cmpl $RW_READER, 8(%esp) - jne 2f - - /* - * Reader - */ - movl (%edx), %eax -0: - testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al - jnz 4f - leal RW_READ_INCR(%eax), %ecx - LOCK(12) - cmpxchgl %ecx, (%edx) - jnz 1f - movl %edx, %eax /* nonzero */ - RET(4) -1: - jmp 0b - - /* - * Writer - */ -2: - xorl %eax, %eax - movl %fs:CPU_INFO_CURLWP(%eax), %ecx - orl $RW_WRITE_LOCKED, %ecx - LOCK(13) - cmpxchgl %ecx, (%edx) - movl $0, %eax - setz %al -3: - RET(5) -4: - xorl %eax, %eax - jmp 3b -END(rw_tryenter) - #ifndef XENPV /* Index: src/sys/kern/kern_rwlock.c diff -u src/sys/kern/kern_rwlock.c:1.59.2.2 src/sys/kern/kern_rwlock.c:1.59.2.3 --- src/sys/kern/kern_rwlock.c:1.59.2.2 Sun Jan 19 19:44:34 2020 +++ src/sys/kern/kern_rwlock.c Sun Jan 19 21:08:29 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_rwlock.c,v 1.59.2.2 2020/01/19 19:44:34 ad Exp $ */ +/* $NetBSD: kern_rwlock.c,v 1.59.2.3 2020/01/19 21:08:29 ad Exp $ */ /*- * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020 @@ -36,10 +36,23 @@ * * Solaris Internals: Core Kernel Architecture, Jim Mauro and * Richard McDougall. + * + * The NetBSD implementation is different from that described in the book, + * in that the locks are adaptive. Lock waiters spin wait while the lock + * holders are on CPU (if the holds can be tracked: up to N per-thread). + * + * While spin waiting, threads compete for the lock without the assistance + * of turnstiles. If a lock holder sleeps for any reason, the lock waiters + * will also sleep in response and at that point turnstiles, priority + * inheritance and strong efforts at ensuring fairness come into play. + * + * The adaptive behaviour is controlled by the RW_SPIN flag bit, which is + * cleared by a lock owner that is going off the CPU, and set again by the + * lock owner that releases the last hold on the lock. */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.59.2.2 2020/01/19 19:44:34 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.59.2.3 2020/01/19 21:08:29 ad Exp $"); #include "opt_lockdebug.h" @@ -59,8 +72,6 @@ __KERNEL_RCSID(0, "$NetBSD: kern_rwlock. #include <dev/lockstat.h> -#include <machine/rwlock.h> - /* * LOCKDEBUG */ @@ -104,19 +115,6 @@ do { \ #define RW_MEMBAR_PRODUCER() membar_producer() #endif -/* - * For platforms that do not provide stubs, or for the LOCKDEBUG case. - */ -#ifdef LOCKDEBUG -#undef __HAVE_RW_STUBS -#endif - -#ifndef __HAVE_RW_STUBS -__strong_alias(rw_enter,rw_vector_enter); -__strong_alias(rw_exit,rw_vector_exit); -__strong_alias(rw_tryenter,rw_vector_tryenter); -#endif - static void rw_abort(const char *, size_t, krwlock_t *, const char *); static void rw_dump(const volatile void *, lockop_printer_t); static lwp_t *rw_owner(wchan_t); @@ -149,6 +147,22 @@ rw_cas(krwlock_t *rw, uintptr_t o, uintp } /* + * rw_and: + * + * Do an atomic AND on the lock word. + */ +static inline void +rw_and(krwlock_t *rw, uintptr_t m) +{ + +#ifdef _LP64 + atomic_and_64(&rw->rw_owner, m); +#else + atomic_and_32(&rw->rw_owner, m); +#endif +} + +/* * rw_swap: * * Do an atomic swap of the lock word. This is used only when it's @@ -167,6 +181,75 @@ rw_swap(krwlock_t *rw, uintptr_t o, uint } /* + * rw_hold_remember: + * + * Helper - when acquring a lock, record the new hold. + */ +static inline uintptr_t +rw_hold_remember(krwlock_t *rw, lwp_t *l) +{ + int i; + + KASSERT(kpreempt_disabled()); + + for (i = 0; i < __arraycount(l->l_rwlocks); i++) { + if (__predict_true(l->l_rwlocks[i] == NULL)) { + l->l_rwlocks[i] = rw; + /* + * Clear the write wanted flag on every acquire to + * give readers a chance once again. + */ + return ~RW_WRITE_WANTED; + } + } + + /* + * Nowhere to track the hold so we lose: temporarily disable + * spinning on the lock. + */ + return ~(RW_WRITE_WANTED | RW_SPIN); +} + +/* + * rw_hold_forget: + * + * Helper - when releasing a lock, stop tracking the hold. + */ +static inline void +rw_hold_forget(krwlock_t *rw, lwp_t *l) +{ + int i; + + KASSERT(kpreempt_disabled()); + + for (i = 0; i < __arraycount(l->l_rwlocks); i++) { + if (__predict_true(l->l_rwlocks[i] == rw)) { + l->l_rwlocks[i] = NULL; + return; + } + } +} + +/* + * rw_switch: + * + * Called by mi_switch() to indicate that an LWP is going off the CPU. + */ +void +rw_switch(void) +{ + lwp_t *l = curlwp; + int i; + + for (i = 0; i < __arraycount(l->l_rwlocks); i++) { + if (l->l_rwlocks[i] != NULL) { + rw_and(l->l_rwlocks[i], ~RW_SPIN); + /* Leave in place for exit to clear. */ + } + } +} + +/* * rw_dump: * * Dump the contents of a rwlock structure. @@ -207,9 +290,9 @@ _rw_init(krwlock_t *rw, uintptr_t return { if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address)) - rw->rw_owner = 0; + rw->rw_owner = RW_SPIN; else - rw->rw_owner = RW_NODEBUG; + rw->rw_owner = RW_SPIN | RW_NODEBUG; } void @@ -228,53 +311,18 @@ void rw_destroy(krwlock_t *rw) { - RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0); + RW_ASSERT(rw, (rw->rw_owner & ~(RW_NODEBUG | RW_SPIN)) == 0); LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw); } /* - * rw_oncpu: - * - * Return true if an rwlock owner is running on a CPU in the system. - * If the target is waiting on the kernel big lock, then we must - * release it. This is necessary to avoid deadlock. - */ -static bool -rw_oncpu(uintptr_t owner) -{ -#ifdef MULTIPROCESSOR - struct cpu_info *ci; - lwp_t *l; - - KASSERT(kpreempt_disabled()); - - if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) { - return false; - } - - /* - * See lwp_dtor() why dereference of the LWP pointer is safe. - * We must have kernel preemption disabled for that. - */ - l = (lwp_t *)(owner & RW_THREAD); - ci = l->l_cpu; - - if (ci && ci->ci_curlwp == l) { - /* Target is running; do we need to block? */ - return (ci->ci_biglock_wanted != l); - } -#endif - /* Not running. It may be safe to block now. */ - return false; -} - -/* * rw_vector_enter: * - * Acquire a rwlock. + * The slow path for acquiring a rwlock, that considers all conditions. + * Marked __noinline to prevent the compiler pulling it into rw_enter(). */ -void -rw_vector_enter(krwlock_t *rw, const krw_t op) +static void __noinline +rw_vector_enter(krwlock_t *rw, const krw_t op, uintptr_t mask, uintptr_t ra) { uintptr_t owner, incr, need_wait, set_wait, curthread, next; turnstile_t *ts; @@ -291,6 +339,7 @@ rw_vector_enter(krwlock_t *rw, const krw RW_ASSERT(rw, !cpu_intr_p()); RW_ASSERT(rw, curthread != 0); + RW_ASSERT(rw, kpreempt_disabled()); RW_WANTLOCK(rw, op); if (panicstr == NULL) { @@ -322,15 +371,13 @@ rw_vector_enter(krwlock_t *rw, const krw LOCKSTAT_ENTER(lsflag); - KPREEMPT_DISABLE(curlwp); for (owner = rw->rw_owner;;) { /* * Read the lock owner field. If the need-to-wait * indicator is clear, then try to acquire the lock. */ if ((owner & need_wait) == 0) { - next = rw_cas(rw, owner, (owner + incr) & - ~RW_WRITE_WANTED); + next = rw_cas(rw, owner, (owner + incr) & mask); if (__predict_true(next == owner)) { /* Got it! */ RW_MEMBAR_ENTER(); @@ -348,11 +395,36 @@ rw_vector_enter(krwlock_t *rw, const krw rw_abort(__func__, __LINE__, rw, "locking against myself"); } + /* - * If the lock owner is running on another CPU, and - * there are no existing waiters, then spin. + * If the lock owner is running on another CPU, and there + * are no existing waiters, then spin. Notes: + * + * 1) If an LWP on this CPU (possibly curlwp, or an LWP that + * curlwp has interupted) holds kernel_lock, we can't spin + * without a deadlock. The CPU that holds the rwlock may be + * blocked trying to acquire kernel_lock, or there may be an + * unseen chain of dependant locks. To defeat the potential + * deadlock, this LWP needs to sleep (and thereby directly + * drop the kernel_lock, or permit the interrupted LWP that + * holds kernel_lock to complete its work). + * + * 2) If trying to acquire a write lock, and the lock is + * currently read held, after a brief wait set the write + * wanted bit to block out new readers and try to avoid + * starvation. When the hold is acquired, we'll clear the + * WRITE_WANTED flag to give readers a chance again. With + * luck this should nudge things in the direction of + * interleaving readers and writers when there is high + * contention. + * + * 3) The spin wait can't be done in soft interrupt context, + * because a lock holder could be pinned down underneath the + * soft interrupt LWP (i.e. curlwp) on the same CPU. For + * the lock holder to make progress and release the lock, + * the soft interrupt needs to sleep. */ - if (rw_oncpu(owner)) { + if ((owner & RW_SPIN) != 0 && !cpu_softintr_p()) { LOCKSTAT_START_TIMER(lsflag, spintime); u_int count = SPINLOCK_BACKOFF_MIN; do { @@ -360,7 +432,19 @@ rw_vector_enter(krwlock_t *rw, const krw SPINLOCK_BACKOFF(count); KPREEMPT_DISABLE(curlwp); owner = rw->rw_owner; - } while (rw_oncpu(owner)); + if ((owner & need_wait) == 0) + break; + if (count != SPINLOCK_BACKOFF_MAX) + continue; + if (curcpu()->ci_biglock_count != 0) + break; + if (op == RW_WRITER && + (owner & RW_WRITE_LOCKED) == 0 && + (owner & RW_WRITE_WANTED) == 0) { + (void)rw_cas(rw, owner, + owner | RW_WRITE_WANTED); + } + } while ((owner & RW_SPIN) != 0); LOCKSTAT_STOP_TIMER(lsflag, spintime); LOCKSTAT_COUNT(spincnt, 1); if ((owner & need_wait) == 0) @@ -374,17 +458,18 @@ rw_vector_enter(krwlock_t *rw, const krw ts = turnstile_lookup(rw); /* - * Mark the rwlock as having waiters. If the set fails, - * then we may not need to sleep and should spin again. - * Reload rw_owner because turnstile_lookup() may have - * spun on the turnstile chain lock. + * Mark the rwlock as having waiters, and disable spinning. + * If the set fails, then we may not need to sleep and + * should spin again. Reload rw_owner now that we own + * the turnstile chain lock. */ owner = rw->rw_owner; - if ((owner & need_wait) == 0 || rw_oncpu(owner)) { + if ((owner & need_wait) == 0 || + ((owner & RW_SPIN) != 0 && !cpu_softintr_p())) { turnstile_exit(rw); continue; } - next = rw_cas(rw, owner, owner | set_wait); + next = rw_cas(rw, owner, (owner | set_wait) & ~RW_SPIN); if (__predict_false(next != owner)) { turnstile_exit(rw); owner = next; @@ -409,11 +494,9 @@ rw_vector_enter(krwlock_t *rw, const krw LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime, - (l->l_rwcallsite != 0 ? l->l_rwcallsite : - (uintptr_t)__builtin_return_address(0))); + (l->l_rwcallsite != 0 ? l->l_rwcallsite : ra)); LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime, - (l->l_rwcallsite != 0 ? l->l_rwcallsite : - (uintptr_t)__builtin_return_address(0))); + (l->l_rwcallsite != 0 ? l->l_rwcallsite : ra)); LOCKSTAT_EXIT(lsflag); RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || @@ -422,11 +505,70 @@ rw_vector_enter(krwlock_t *rw, const krw } /* - * rw_vector_exit: + * rw_enter: * - * Release a rwlock. + * The fast path for acquiring a lock that considers only the + * uncontended case. Falls back to rw_vector_enter(). */ void +rw_enter(krwlock_t *rw, const krw_t op) +{ + uintptr_t owner, incr, need_wait, curthread, next, mask; + lwp_t *l; + + l = curlwp; + curthread = (uintptr_t)l; + + RW_ASSERT(rw, !cpu_intr_p()); + RW_ASSERT(rw, curthread != 0); + RW_WANTLOCK(rw, op); + + KPREEMPT_DISABLE(l); + mask = rw_hold_remember(rw, l); + + /* + * We play a slight trick here. If we're a reader, we want + * increment the read count. If we're a writer, we want to + * set the owner field and the WRITE_LOCKED bit. + * + * In the latter case, we expect those bits to be zero, + * therefore we can use an add operation to set them, which + * means an add operation for both cases. + */ + if (__predict_true(op == RW_READER)) { + incr = RW_READ_INCR; + need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; + } else { + RW_ASSERT(rw, op == RW_WRITER); + incr = curthread | RW_WRITE_LOCKED; + need_wait = RW_WRITE_LOCKED | RW_THREAD; + } + + /* + * Read the lock owner field. If the need-to-wait + * indicator is clear, then try to acquire the lock. + */ + owner = rw->rw_owner; + if ((owner & need_wait) == 0) { + next = rw_cas(rw, owner, (owner + incr) & mask); + if (__predict_true(next == owner)) { + /* Got it! */ + KPREEMPT_ENABLE(l); + RW_MEMBAR_ENTER(); + return; + } + } + + rw_vector_enter(rw, op, mask, (uintptr_t)__builtin_return_address(0)); +} + +/* + * rw_vector_exit: + * + * The slow path for releasing a rwlock, that considers all conditions. + * Marked __noinline to prevent the compiler pulling it into rw_enter(). + */ +static void __noinline rw_vector_exit(krwlock_t *rw) { uintptr_t curthread, owner, decr, newown, next; @@ -437,6 +579,7 @@ rw_vector_exit(krwlock_t *rw) l = curlwp; curthread = (uintptr_t)l; RW_ASSERT(rw, curthread != 0); + RW_ASSERT(rw, kpreempt_disabled()); /* * Again, we use a trick. Since we used an add operation to @@ -465,9 +608,15 @@ rw_vector_exit(krwlock_t *rw) newown = (owner - decr); if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) break; + /* Want spinning enabled if lock is becoming free. */ + if ((newown & RW_THREAD) == 0) + newown |= RW_SPIN; next = rw_cas(rw, owner, newown); - if (__predict_true(next == owner)) + if (__predict_true(next == owner)) { + rw_hold_forget(rw, l); + kpreempt_enable(); return; + } owner = next; } @@ -507,12 +656,14 @@ rw_vector_exit(krwlock_t *rw) if (wcnt > 1) newown |= RW_WRITE_WANTED; rw_swap(rw, owner, newown); + rw_hold_forget(rw, l); turnstile_wakeup(ts, TS_WRITER_Q, 1, l); } else { /* Wake all writers and let them fight it out. */ newown = owner & RW_NODEBUG; newown |= RW_WRITE_WANTED; rw_swap(rw, owner, newown); + rw_hold_forget(rw, l); turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL); } } else { @@ -530,19 +681,73 @@ rw_vector_exit(krwlock_t *rw) /* Wake up all sleeping readers. */ rw_swap(rw, owner, newown); + rw_hold_forget(rw, l); turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); } + kpreempt_enable(); +} + +/* + * rw_exit: + * + * The fast path for releasing a lock that considers only the + * uncontended case. Falls back to rw_vector_exit(). + */ +void +rw_exit(krwlock_t *rw) +{ + uintptr_t curthread, owner, decr, newown, next; + lwp_t *l; + + l = curlwp; + curthread = (uintptr_t)l; + RW_ASSERT(rw, curthread != 0); + + /* + * Again, we use a trick. Since we used an add operation to + * set the required lock bits, we can use a subtract to clear + * them, which makes the read-release and write-release path + * the same. + */ + owner = rw->rw_owner; + if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { + RW_UNLOCKED(rw, RW_WRITER); + RW_ASSERT(rw, RW_OWNER(rw) == curthread); + decr = curthread | RW_WRITE_LOCKED; + } else { + RW_UNLOCKED(rw, RW_READER); + RW_ASSERT(rw, RW_COUNT(rw) != 0); + decr = RW_READ_INCR; + } + + /* Now try to release it. */ + RW_MEMBAR_EXIT(); + KPREEMPT_DISABLE(l); + newown = (owner - decr); + if (__predict_true((newown & (RW_THREAD | RW_HAS_WAITERS)) != + RW_HAS_WAITERS)) { + /* Want spinning (re-)enabled if lock is becoming free. */ + if ((newown & RW_THREAD) == 0) + newown |= RW_SPIN; + next = rw_cas(rw, owner, newown); + if (__predict_true(next == owner)) { + rw_hold_forget(rw, l); + KPREEMPT_ENABLE(l); + return; + } + } + rw_vector_exit(rw); } /* - * rw_vector_tryenter: + * rw_tryenter: * * Try to acquire a rwlock. */ int -rw_vector_tryenter(krwlock_t *rw, const krw_t op) +rw_tryenter(krwlock_t *rw, const krw_t op) { - uintptr_t curthread, owner, incr, need_wait, next; + uintptr_t curthread, owner, incr, need_wait, next, mask; lwp_t *l; l = curlwp; @@ -550,6 +755,9 @@ rw_vector_tryenter(krwlock_t *rw, const RW_ASSERT(rw, curthread != 0); + KPREEMPT_DISABLE(l); + mask = rw_hold_remember(rw, l); + if (op == RW_READER) { incr = RW_READ_INCR; need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; @@ -560,9 +768,12 @@ rw_vector_tryenter(krwlock_t *rw, const } for (owner = rw->rw_owner;; owner = next) { - if (__predict_false((owner & need_wait) != 0)) + if (__predict_false((owner & need_wait) != 0)) { + rw_hold_forget(rw, l); + KPREEMPT_ENABLE(l); return 0; - next = rw_cas(rw, owner, owner + incr); + } + next = rw_cas(rw, owner, (owner + incr) & mask); if (__predict_true(next == owner)) { /* Got it! */ break; @@ -574,6 +785,7 @@ rw_vector_tryenter(krwlock_t *rw, const RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || (op == RW_READER && RW_COUNT(rw) != 0)); + KPREEMPT_ENABLE(l); RW_MEMBAR_ENTER(); return 1; } @@ -611,7 +823,7 @@ rw_downgrade(krwlock_t *rw) * waiters. */ if ((owner & RW_HAS_WAITERS) == 0) { - newown = (owner & RW_NODEBUG); + newown = (owner & RW_NODEBUG) | RW_SPIN; next = rw_cas(rw, owner, newown + RW_READ_INCR); if (__predict_true(next == owner)) { RW_LOCKED(rw, RW_READER); @@ -638,7 +850,8 @@ rw_downgrade(krwlock_t *rw) /* * If there are no readers, just preserve the * waiters bits, swap us down to one read hold and - * return. + * return. Don't set the spin bit as nobody's + * running yet. */ RW_ASSERT(rw, wcnt != 0); RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); @@ -656,7 +869,8 @@ rw_downgrade(krwlock_t *rw) * Give the lock to all blocked readers. We may * retain one read hold if downgrading. If there is * a writer waiting, new readers will be blocked - * out. + * out. Don't set the spin bit as nobody's running + * yet. */ newown = owner & RW_NODEBUG; newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; @@ -787,3 +1001,18 @@ rw_owner(wchan_t obj) return (void *)(owner & RW_THREAD); } + +/* + * rw_owner_running: + * + * Return true if a RW lock is unheld, or held and the owner is running + * on a CPU. For the pagedaemon only - do not document or use in other + * code. + */ +bool +rw_owner_running(const krwlock_t *rw) +{ + uintptr_t owner = rw->rw_owner; + + return (owner & RW_THREAD) == 0 || (owner & RW_SPIN) != 0; +} Index: src/sys/kern/kern_synch.c diff -u src/sys/kern/kern_synch.c:1.334.2.1 src/sys/kern/kern_synch.c:1.334.2.2 --- src/sys/kern/kern_synch.c:1.334.2.1 Fri Jan 17 21:47:35 2020 +++ src/sys/kern/kern_synch.c Sun Jan 19 21:08:29 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_synch.c,v 1.334.2.1 2020/01/17 21:47:35 ad Exp $ */ +/* $NetBSD: kern_synch.c,v 1.334.2.2 2020/01/19 21:08:29 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019 @@ -69,7 +69,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.334.2.1 2020/01/17 21:47:35 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.334.2.2 2020/01/19 21:08:29 ad Exp $"); #include "opt_kstack.h" #include "opt_dtrace.h" @@ -83,6 +83,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_synch.c #include <sys/cpu.h> #include <sys/pserialize.h> #include <sys/resourcevar.h> +#include <sys/rwlock.h> #include <sys/sched.h> #include <sys/syscall_stats.h> #include <sys/sleepq.h> @@ -657,6 +658,9 @@ mi_switch(lwp_t *l) /* We're down to only one lock, so do debug checks. */ LOCKDEBUG_BARRIER(l->l_mutex, 1); + /* Disable spinning on any R/W locks that we hold. */ + rw_switch(); + /* Count the context switch. */ CPU_COUNT(CPU_COUNT_NSWTCH, 1); l->l_ncsw++; Index: src/sys/sys/lwp.h diff -u src/sys/sys/lwp.h:1.192.2.1 src/sys/sys/lwp.h:1.192.2.2 --- src/sys/sys/lwp.h:1.192.2.1 Fri Jan 17 21:47:37 2020 +++ src/sys/sys/lwp.h Sun Jan 19 21:08:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: lwp.h,v 1.192.2.1 2020/01/17 21:47:37 ad Exp $ */ +/* $NetBSD: lwp.h,v 1.192.2.2 2020/01/19 21:08:30 ad Exp $ */ /* * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019 @@ -186,6 +186,7 @@ struct lwp { u_short l_exlocks; /* !: lockdebug: excl. locks held */ u_short l_psrefs; /* !: count of psref held */ u_short l_blcnt; /* !: count of kernel_lock held */ + struct krwlock *l_rwlocks[4]; /* !: tracks first N held rwlocks */ int l_nopreempt; /* !: don't preempt me! */ u_int l_dopreempt; /* s: kernel preemption pending */ int l_pflag; /* !: LWP private flags */