> Date: Fri, 6 Oct 2017 02:58:20 +0000 > From: Taylor R Campbell <campbell+netbsd-tech-k...@mumble.net> > > I wonder whether the open-coded version would do better if it > unconditionally loaded the percpu:
For concreteness, in case you'd like to test this, I've attached an updated patch that yields the conditional move sequence I quoted.
Index: sys/sys/psref.h =================================================================== RCS file: /cvsroot/src/sys/sys/psref.h,v retrieving revision 1.2 diff -p -u -r1.2 psref.h --- sys/sys/psref.h 16 Dec 2016 20:12:11 -0000 1.2 +++ sys/sys/psref.h 6 Oct 2017 03:08:58 -0000 @@ -69,7 +69,8 @@ struct psref_target { * written only on the local CPU. */ struct psref { - LIST_ENTRY(psref) psref_entry; + struct psref *psref_next; + struct psref **psref_prevp; const struct psref_target *psref_target; struct lwp *psref_lwp; struct cpu_info *psref_cpu; Index: sys/kern/subr_psref.c =================================================================== RCS file: /cvsroot/src/sys/kern/subr_psref.c,v retrieving revision 1.7 diff -p -u -r1.7 subr_psref.c --- sys/kern/subr_psref.c 1 Jun 2017 02:45:13 -0000 1.7 +++ sys/kern/subr_psref.c 6 Oct 2017 03:08:58 -0000 @@ -78,8 +78,6 @@ __KERNEL_RCSID(0, "$NetBSD: subr_psref.c #include <sys/queue.h> #include <sys/xcall.h> -LIST_HEAD(psref_head, psref); - static bool _psref_held(const struct psref_target *, struct psref_class *, bool); @@ -103,7 +101,7 @@ struct psref_class { * Not exposed by the API. */ struct psref_cpu { - struct psref_head pcpu_head; + struct psref *pcpu_first; }; /* @@ -135,7 +133,7 @@ psref_cpu_drained_p(void *p, void *cooki const struct psref_cpu *pcpu = p; bool *retp = cookie; - if (!LIST_EMPTY(&pcpu->pcpu_head)) + if (pcpu->pcpu_first != NULL) *retp = false; } @@ -194,7 +192,9 @@ psref_check_duplication(struct psref_cpu bool found = false; struct psref *_psref; - LIST_FOREACH(_psref, &pcpu->pcpu_head, psref_entry) { + for (_psref = pcpu->pcpu_first; + _psref != NULL; + _psref = _psref->psref_next) { if (_psref == psref && _psref->psref_target == target) { found = true; @@ -250,7 +250,11 @@ psref_acquire(struct psref *psref, const #endif /* Record our reference. */ - LIST_INSERT_HEAD(&pcpu->pcpu_head, psref, psref_entry); + if ((psref->psref_next = pcpu->pcpu_first) != NULL) + psref->psref_next->psref_prevp = &psref->psref_next; + psref->psref_prevp = NULL; /* head of list */ + pcpu->pcpu_first = psref; + psref->psref_target = target; psref->psref_lwp = curlwp; psref->psref_cpu = curcpu(); @@ -273,6 +277,7 @@ void psref_release(struct psref *psref, const struct psref_target *target, struct psref_class *class) { + struct psref_cpu *pcpu; int s; KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() || @@ -297,12 +302,31 @@ psref_release(struct psref *psref, const /* * Block interrupts and remove the psref from the current CPU's - * list. No need to percpu_getref or get the head of the list, - * and the caller guarantees that we are bound to a CPU anyway - * (as does blocking interrupts). + * list. */ s = splraiseipl(class->prc_iplcookie); - LIST_REMOVE(psref, psref_entry); + if (psref->psref_next != NULL) + psref->psref_next->psref_prevp = psref->psref_prevp; + + /* + * If it's at the head of the list, we must modify the head + * explicitly. We can't use a back-pointer into the head + * because percpu_alloc may move the per-CPU objects around in + * memory. + */ + pcpu = percpu_getref(class->prc_percpu); + KASSERTMSG(psref->psref_prevp == NULL || *psref->psref_prevp == psref, + "psref %p prevp %p points at %p", + psref, psref->psref_prevp, *psref->psref_prevp); + KASSERTMSG(psref->psref_prevp != NULL || pcpu->pcpu_first == psref, + "psref %p marked as first but psref_cpu %p on %d first is %p", + psref, pcpu, cpu_index(curcpu()), pcpu->pcpu_first); + *(psref->psref_prevp ? psref->psref_prevp : &pcpu->pcpu_first) = + psref->psref_next; + percpu_putref(class->prc_percpu); + + psref->psref_prevp = (void *)1; /* poison */ + psref->psref_next = (void *)2; /* poison */ splx(s); /* If someone is waiting for users to drain, notify 'em. */ @@ -353,7 +377,11 @@ psref_copy(struct psref *pto, const stru pcpu = percpu_getref(class->prc_percpu); /* Record the new reference. */ - LIST_INSERT_HEAD(&pcpu->pcpu_head, pto, psref_entry); + if ((pto->psref_next = pcpu->pcpu_first) != NULL) + pto->psref_next->psref_prevp = &pto->psref_next; + pto->psref_prevp = NULL; /* head of list */ + pcpu->pcpu_first = pto; + pto->psref_target = pfrom->psref_target; pto->psref_lwp = curlwp; pto->psref_cpu = curcpu(); @@ -474,7 +502,9 @@ _psref_held(const struct psref_target *t pcpu = percpu_getref(class->prc_percpu); /* Search through all the references on this CPU. */ - LIST_FOREACH(psref, &pcpu->pcpu_head, psref_entry) { + for (psref = pcpu->pcpu_first; + psref != NULL; + psref = psref->psref_next) { /* Sanity-check the reference's CPU. */ KASSERTMSG((psref->psref_cpu == curcpu()), "passive reference transferred from CPU %u to CPU %u",