Date: Mon, 1 Feb 2016 14:58:05 +0900 From: Ryota Ozaki <ozak...@netbsd.org>
On Sat, Jan 30, 2016 at 12:43 AM, Taylor R Campbell <riastr...@netbsd.org> wrote: > Date: Fri, 29 Jan 2016 17:39:13 +0900 > From: Ryota Ozaki <ozak...@netbsd.org> > > I'm thinking applying psref to bridge member list > that is now using its own version of similar mechanism > (psz + refcount). > > Nice! That sounds like a good application to test, since most of the > work has been done already and it's mainly a matter of replacing a > refcount by a psref. I'm happy if there is a patch of psref.c to -current :) Here you go, attached! I even compile-tested this one. > int bound = curlwp->l_pflag & LP_BOUND; > curlwp->l_pflag |= LP_BOUND; > ...acquire and release psref... > curlwp->l_pflag ^= bound ^ LP_BOUND; > > [...] > > Someone more familiar with the inner workings of LWPs and the > scheduler should review this, though. And maybe we should invent a > name lwp_bind/lwp_unbind for this pattern. I agree; open accesses to l_pflag looks not good idea. Actually, I think it is fine. There was only one exception I could find in-tree, LP_VFORKWAIT, and it has been relegated to #if 0. All other flags are used in l_pflag unlocked, and I believe the scheduler and interrupt handlers will undo any changes they make -- LP_BOUND, LP_INTR, LP_OWEUPC, LP_RUNNING, LP_TIMEINTR. That said, I think naming lwp_bind/lwp_unbind is still a good idea! Maybe instead curlwp_bind/curlwp_unbind. /* sys/lwp.h */ static inline int curlwp_bind(void) { int bound; bound = curlwp->l_pflag & LP_BOUND; curlwp->l_pflag |= LP_BOUND; return bound; } static inline void curlwp_unbind(int bound) { curlwp->l_pflag ^= bound ^ LP_BOUND; }
Index: sys/arch/amd64/conf/ALL =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/conf/ALL,v retrieving revision 1.33 diff -p -u -r1.33 ALL --- sys/arch/amd64/conf/ALL 10 Nov 2015 13:01:41 -0000 1.33 +++ sys/arch/amd64/conf/ALL 2 Feb 2016 00:08:33 -0000 @@ -125,6 +125,7 @@ options SYSCALL_STATS # per syscall cou options SYSCALL_TIMES # per syscall times options SYSCALL_TIMES_HASCOUNTER # use 'broken' rdtsc (soekris) options KDTRACE_HOOKS # kernel DTrace hooks +options PSREF_DEBUG # debug passive references # Compatibility options #options COMPAT_NOMID # NetBSD 0.8, 386BSD, and BSDI Index: sys/arch/i386/conf/ALL =================================================================== RCS file: /cvsroot/src/sys/arch/i386/conf/ALL,v retrieving revision 1.397 diff -p -u -r1.397 ALL --- sys/arch/i386/conf/ALL 25 Oct 2015 22:48:23 -0000 1.397 +++ sys/arch/i386/conf/ALL 2 Feb 2016 00:08:34 -0000 @@ -125,6 +125,7 @@ options SYSCALL_STATS # per syscall cou options SYSCALL_TIMES # per syscall times options SYSCALL_TIMES_HASCOUNTER # use 'broken' rdtsc (soekris) options KDTRACE_HOOKS # kernel DTrace hooks +options PSREF_DEBUG # debug passive references # Compatibility options options COMPAT_NOMID # NetBSD 0.8, 386BSD, and BSDI Index: sys/conf/files =================================================================== RCS file: /cvsroot/src/sys/conf/files,v retrieving revision 1.1152 diff -p -u -r1.1152 files --- sys/conf/files 9 Dec 2015 18:25:32 -0000 1.1152 +++ sys/conf/files 2 Feb 2016 00:08:34 -0000 @@ -284,6 +284,7 @@ defparam opt_kgdb.h KGDB_DEV KGDB_DEVNA defflag LOCKDEBUG defflag SYSCALL_DEBUG defflag opt_kstack.h KSTACK_CHECK_MAGIC +defflag opt_psref.h PSREF_DEBUG # memory (ram) disk options # Index: sys/kern/files.kern =================================================================== RCS file: /cvsroot/src/sys/kern/files.kern,v retrieving revision 1.10 diff -p -u -r1.10 files.kern --- sys/kern/files.kern 3 Dec 2015 02:51:00 -0000 1.10 +++ sys/kern/files.kern 2 Feb 2016 00:08:35 -0000 @@ -125,6 +125,7 @@ file kern/subr_pool.c kern file kern/subr_prf.c kern file kern/subr_prof.c kern file kern/subr_pserialize.c kern +file kern/subr_psref.c kern file kern/subr_specificdata.c kern file kern/subr_tftproot.c tftproot file kern/subr_time.c kern Index: sys/kern/subr_psref.c =================================================================== RCS file: sys/kern/subr_psref.c diff -N sys/kern/subr_psref.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/kern/subr_psref.c 2 Feb 2016 00:08:35 -0000 @@ -0,0 +1,409 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2016 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Taylor R. Campbell. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Passive references + * + * Passive references are references to objects that guarantee the + * object will not be destroyed until the reference is released. + * + * Passive references require no interprocessor synchronization to + * acquire or release. However, destroying the target of passive + * references requires expensive interprocessor synchronization -- + * xcalls to determine on which CPUs the object is still in use. + * + * Passive references may be held only on a single CPU and by a + * single LWP. They require the caller to allocate a little stack + * space, a struct psref object. Sleeping while a passive + * reference is held is allowed, provided that the owner's LWP is + * bound to a CPU -- e.g., the owner is a softint or a bound + * kthread. However, sleeping should be kept to a short duration, + * e.g. sleeping on an adaptive lock. + * + * Passive references serve as an intermediate stage between + * reference counting and passive serialization (pserialize(9)): + * + * - If you need references to transfer from CPU to CPU or LWP to + * LWP, or if you need long-term references, you must use + * reference counting, e.g. with atomic operations or locks, + * which incurs interprocessor synchronization for every use -- + * cheaper than an xcall, but not scalable. + * + * - If all users *guarantee* that they will not sleep, then it is + * not necessary to use passive references: you may as well just + * use the even cheaper pserialize(9), because you have + * satisfied the requirements of a pserialize read section. + */ + +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD$"); + +#include <sys/types.h> +#include <sys/condvar.h> +#include <sys/cpu.h> +#include <sys/intr.h> +#include <sys/kmem.h> +#include <sys/lwp.h> +#include <sys/mutex.h> +#include <sys/percpu.h> +#include <sys/psref.h> +#include <sys/queue.h> +#include <sys/xcall.h> + +LIST_HEAD(psref_head, psref); + +/* + * struct psref_class + * + * Private global state for a class of passive reference targets. + * Opaque to callers. + */ +struct psref_class { + kmutex_t prc_lock; + kcondvar_t prc_cv; + struct percpu *prc_percpu; /* struct psref_cpu */ + ipl_cookie_t prc_iplcookie; +}; + +/* + * struct psref_cpu + * + * Private per-CPU state for a class of passive reference targets. + * Not exposed by the API. + */ +struct psref_cpu { + struct psref_head pcpu_head; +}; + +/* + * psref_class_create(name, ipl) + * + * Create a new passive reference class, with the given wchan name + * and ipl. + */ +struct psref_class * +psref_class_create(const char *name, int ipl) +{ + struct psref_class *class; + + ASSERT_SLEEPABLE(); + + class = kmem_alloc(sizeof(*class), KM_SLEEP); + if (class == NULL) + goto fail0; + + class->prc_percpu = percpu_alloc(sizeof(struct psref_cpu)); + if (class->prc_percpu == NULL) + goto fail1; + + mutex_init(&class->prc_lock, MUTEX_DEFAULT, ipl); + cv_init(&class->prc_cv, name); + class->prc_iplcookie = makeiplcookie(ipl); + +fail1: kmem_free(class, sizeof(*class)); +fail0: return NULL; +} + +#if DIAGNOSTIC +static void +psref_cpu_drained_p(void *p, void *cookie, struct cpu_info *ci __unused) +{ + struct psref_cpu *pcpu = p; + bool *retp = cookie; + + if (!LIST_EMPTY(&pcpu->pcpu_head)) + *retp = false; +} + +static bool +psref_class_drained_p(struct psref_class *prc) +{ + bool ret = true; + + percpu_foreach(prc->prc_percpu, &psref_cpu_drained_p, &ret); + + return ret; +} +#endif /* DIAGNOSTIC */ + +/* + * psref_class_destroy(class) + * + * Destroy a passive reference class and free memory associated + * with it. All targets in this class must have been drained and + * destroyed already. + */ +void +psref_class_destroy(struct psref_class *class) +{ + + KASSERT(psref_class_drained_p(class)); + + cv_destroy(&class->prc_cv); + mutex_destroy(&class->prc_lock); + percpu_free(class->prc_percpu, sizeof(struct psref_cpu)); + kmem_free(class, sizeof(*class)); +} + +/* + * psref_target_init(target, class) + * + * Initialize a passive reference target in the specified class. + * The caller is responsible for issuing a membar_producer before + * exposing a pointer to the target to other CPUs. + */ +void +psref_target_init(struct psref_target *target, struct psref_class *class) +{ + + target->prt_draining = false; +#ifdef PSREF_DEBUG + target->prt_class = class; +#endif +} + +/* + * psref_target_destroy(target, class) + * + * Destroy a passive reference target. It must have previously + * been drained. + */ +void +psref_target_destroy(struct psref_target *target, struct psref_class *class) +{ + + KASSERT(target->prt_draining); +#ifdef PSREF_DEBUG + KASSERT(target->prt_class == class); + target->prt_class = NULL; +#endif +} + +/* + * psref_acquire(psref, target, class) + * + * Try to acquire a passive reference to the specified target, + * which must be in the specified class. On success, returns + * zero; on failure, returns a nonzero error code. If the target + * is draining, returns ENOENT. + * + * The caller must guarantee that it will not switch CPUs before + * releasing the passive reference, either by disabling + * kpreemption and avoiding sleeps, or by being in a softint or in + * an LWP bound to a CPU. + */ +int +psref_acquire(struct psref *psref, struct psref_target *target, + struct psref_class *class) +{ + struct psref_cpu *pcpu; + int s, error; + + KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() || + ISSET(curlwp->l_pflag, LP_BOUND)), + "passive references are CPU-local," + " but preemption is enabled and the caller is not" + " in a softint or CPU-bound LWP"); + +#ifdef PSREF_DEBUG + KASSERT(target->prt_class == class); +#endif + + /* Block interrupts and acquire the current CPU's reference list. */ + s = splraiseipl(class->prc_iplcookie); + pcpu = percpu_getref(class->prc_percpu); + + /* Is this target going away? */ + if (__predict_false(target->prt_draining)) { + /* Yes: fail. */ + error = ENOENT; + } else { + /* No: record our reference. */ + LIST_INSERT_HEAD(&pcpu->pcpu_head, psref, psref_entry); + psref->psref_target = target; +#ifdef PSREF_DEBUG + psref->psref_lwp = curlwp; + psref->psref_cpu = curcpu(); +#endif + error = 0; + } + + /* Release the CPU list and restore interrupts. */ + percpu_putref(class->prc_percpu); + splx(s); + + return error; +} + +/* + * psref_release(psref, target, class) + * + * Release a passive reference to the specified target, which must + * be in the specified class. + * + * The caller must not have switched CPUs or LWPs since acquiring + * the passive reference. + */ +void +psref_release(struct psref *psref, struct psref_target *target, + struct psref_class *class) +{ + int s; + + KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() || + ISSET(curlwp->l_pflag, LP_BOUND)), + "passive references are CPU-local," + " but preemption is enabled and the caller is not" + " in a softint or CPU-bound LWP"); + + KASSERT(psref->psref_target == target); +#ifdef PSREF_DEBUG + KASSERT(target->prt_class == class); + KASSERTMSG((psref->psref_lwp == curlwp), + "passive reference transferred from lwp %p to lwp %p", + psref->psref_lwp, curlwp); + KASSERTMSG((psref->psref_cpu == curcpu()), + "passive reference transferred from CPU %u to CPU %u", + cpu_index(psref->psref_cpu), cpu_index(curcpu())); +#endif + + /* + * Block interrupts and remove the psref from the current CPU's + * list. No need to percpu_getref or get the head of the list, + * and the caller guarantees that we are bound to a CPU anyway + * (as does blocking interrupts). + */ + s = splraiseipl(class->prc_iplcookie); + LIST_REMOVE(psref, psref_entry); + splx(s); + + /* If someone is waiting for users to drain, notify 'em. */ + if (__predict_false(target->prt_draining)) + cv_broadcast(&class->prc_cv); +} + +/* + * struct psreffed + * + * Global state for draining a psref target. + */ +struct psreffed { + struct psref_class *class; + struct psref_target *target; + bool ret; +}; + +static void +psreffed_p_xc(void *cookie0, void *cookie1 __unused) +{ + struct psreffed *P = cookie0; + struct psref_class *class = P->class; + struct psref_target *target = P->target; + struct psref_cpu *pcpu; + struct psref *psref; + int s; + + /* Block interrupts and acquire the current CPU's reference list. */ + s = splraiseipl(class->prc_iplcookie); + pcpu = percpu_getref(class->prc_percpu); + + /* + * Check the CPU's reference list for any references to this + * target. This loop shouldn't take very long because any + * single CPU should hold only a small number of references at + * any given time unless there is a bug. + */ + LIST_FOREACH(psref, &pcpu->pcpu_head, psref_entry) { + if (psref->psref_target == target) { + /* + * No need to lock anything here: every write + * transitions from false to true, so as long + * as any write goes through we're good. No + * need for a memory barrier because this is + * read only after xc_wait, which has already + * issued any necessary memory barriers. + */ + P->ret = true; + break; + } + } + + /* Release the CPU list and restore interrupts. */ + percpu_putref(class->prc_percpu); + splx(s); +} + +static bool +psreffed_p(struct psref_target *target, struct psref_class *class) +{ + struct psreffed P = { + .class = class, + .target = target, + .ret = false, + }; + + xc_wait(xc_broadcast(0, &psreffed_p_xc, &P, NULL)); + + return P.ret; +} + +/* + * psref_target_drain(target, class) + * + * Prevent new references to target and wait for existing ones to + * drain. May sleep. + */ +void +psref_target_drain(struct psref_target *target, struct psref_class *class) +{ + +#ifdef PSREF_DEBUG + KASSERT(target->prt_class == class); +#endif + + ASSERT_SLEEPABLE(); + KASSERT(!target->prt_draining); + target->prt_draining = true; + + /* Wait until there are no more references on any CPU. */ + while (psreffed_p(target, class)) { + /* + * This enter/wait/exit business looks wrong, but it is + * both necessary, because psreffed_p performs a + * low-priority xcall and hence cannot run while a + * mutex is locked, and OK, because the wait is timed + * -- explicit wakeups are only an optimization. + */ + mutex_enter(&class->prc_lock); + (void)cv_timedwait(&class->prc_cv, &class->prc_lock, 1); + mutex_exit(&class->prc_lock); + } +} Index: sys/sys/psref.h =================================================================== RCS file: sys/sys/psref.h diff -N sys/sys/psref.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/psref.h 2 Feb 2016 00:08:36 -0000 @@ -0,0 +1,107 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2016 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Taylor R. Campbell. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_PSREF_H +#define _SYS_PSREF_H + +#include <sys/types.h> +#include <sys/queue.h> + +/* + * PSREF_DEBUG + * + * If nonzero, enable debugging of psrefs. WARNING: This changes + * the ABI by adding extra fields to struct psref_target and + * struct psref, which are exposed to callers and embedded in + * other structures. + */ +#ifdef _KERNEL_OPT +#include "opt_psref.h" +#endif + +struct cpu_info; +struct lwp; + +struct psref; +struct psref_class; +struct psref_target; + +/* + * struct psref_target + * + * Bookkeeping for an object to which users can acquire passive + * references. This is compact so that it can easily be embedded + * into many multitudes of objects, e.g. IP packet flows. + * + * prt_draining is false on initialization, and may be written + * only once, to make it true, when someone with exclusive access + * to it is about to drain the target. + */ +struct psref_target { + bool prt_draining; +#ifdef PSREF_DEBUG + struct psref_class *prt_class; +#endif +}; + +/* + * struct psref + * + * Bookkeeping for a single passive reference. There should only + * be a few of these per CPU in the system at once, no matter how + * many targets are stored, so these are a bit larger than struct + * psref_target. The contents of struct psref may be read and + * written only on the local CPU. + */ +struct psref { + LIST_ENTRY(psref) psref_entry; + struct psref_target *psref_target; +#ifdef PSREF_DEBUG + struct lwp *psref_lwp; + struct cpu_info *psref_cpu; +#endif +}; + +struct psref_class * + psref_class_create(const char *, int); +void psref_class_destroy(struct psref_class *); + +void psref_target_init(struct psref_target *, struct psref_class *); +void psref_target_destroy(struct psref_target *, struct psref_class *); + +int psref_acquire(struct psref *, struct psref_target *, + struct psref_class *); +void psref_release(struct psref *, struct psref_target *, + struct psref_class *); + +void psref_target_drain(struct psref_target *, struct psref_class *); + +#endif /* _SYS_PSREF_H */