> Date: Fri, 6 Oct 2017 02:58:20 +0000
> From: Taylor R Campbell <campbell+netbsd-tech-k...@mumble.net>
> 
> I wonder whether the open-coded version would do better if it
> unconditionally loaded the percpu:

For concreteness, in case you'd like to test this, I've attached an
updated patch that yields the conditional move sequence I quoted.
Index: sys/sys/psref.h
===================================================================
RCS file: /cvsroot/src/sys/sys/psref.h,v
retrieving revision 1.2
diff -p -u -r1.2 psref.h
--- sys/sys/psref.h     16 Dec 2016 20:12:11 -0000      1.2
+++ sys/sys/psref.h     6 Oct 2017 03:08:58 -0000
@@ -69,7 +69,8 @@ struct psref_target {
  *     written only on the local CPU.
  */
 struct psref {
-       LIST_ENTRY(psref)               psref_entry;
+       struct psref                    *psref_next;
+       struct psref                    **psref_prevp;
        const struct psref_target       *psref_target;
        struct lwp                      *psref_lwp;
        struct cpu_info                 *psref_cpu;
Index: sys/kern/subr_psref.c
===================================================================
RCS file: /cvsroot/src/sys/kern/subr_psref.c,v
retrieving revision 1.7
diff -p -u -r1.7 subr_psref.c
--- sys/kern/subr_psref.c       1 Jun 2017 02:45:13 -0000       1.7
+++ sys/kern/subr_psref.c       6 Oct 2017 03:08:58 -0000
@@ -78,8 +78,6 @@ __KERNEL_RCSID(0, "$NetBSD: subr_psref.c
 #include <sys/queue.h>
 #include <sys/xcall.h>
 
-LIST_HEAD(psref_head, psref);
-
 static bool    _psref_held(const struct psref_target *, struct psref_class *,
                    bool);
 
@@ -103,7 +101,7 @@ struct psref_class {
  *     Not exposed by the API.
  */
 struct psref_cpu {
-       struct psref_head       pcpu_head;
+       struct psref            *pcpu_first;
 };
 
 /*
@@ -135,7 +133,7 @@ psref_cpu_drained_p(void *p, void *cooki
        const struct psref_cpu *pcpu = p;
        bool *retp = cookie;
 
-       if (!LIST_EMPTY(&pcpu->pcpu_head))
+       if (pcpu->pcpu_first != NULL)
                *retp = false;
 }
 
@@ -194,7 +192,9 @@ psref_check_duplication(struct psref_cpu
        bool found = false;
        struct psref *_psref;
 
-       LIST_FOREACH(_psref, &pcpu->pcpu_head, psref_entry) {
+       for (_psref = pcpu->pcpu_first;
+            _psref != NULL;
+            _psref = _psref->psref_next) {
                if (_psref == psref &&
                    _psref->psref_target == target) {
                        found = true;
@@ -250,7 +250,11 @@ psref_acquire(struct psref *psref, const
 #endif
 
        /* Record our reference.  */
-       LIST_INSERT_HEAD(&pcpu->pcpu_head, psref, psref_entry);
+       if ((psref->psref_next = pcpu->pcpu_first) != NULL)
+               psref->psref_next->psref_prevp = &psref->psref_next;
+       psref->psref_prevp = NULL;      /* head of list */
+       pcpu->pcpu_first = psref;
+
        psref->psref_target = target;
        psref->psref_lwp = curlwp;
        psref->psref_cpu = curcpu();
@@ -273,6 +277,7 @@ void
 psref_release(struct psref *psref, const struct psref_target *target,
     struct psref_class *class)
 {
+       struct psref_cpu *pcpu;
        int s;
 
        KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() ||
@@ -297,12 +302,31 @@ psref_release(struct psref *psref, const
 
        /*
         * Block interrupts and remove the psref from the current CPU's
-        * list.  No need to percpu_getref or get the head of the list,
-        * and the caller guarantees that we are bound to a CPU anyway
-        * (as does blocking interrupts).
+        * list.
         */
        s = splraiseipl(class->prc_iplcookie);
-       LIST_REMOVE(psref, psref_entry);
+       if (psref->psref_next != NULL)
+               psref->psref_next->psref_prevp = psref->psref_prevp;
+
+       /*
+        * If it's at the head of the list, we must modify the head
+        * explicitly.  We can't use a back-pointer into the head
+        * because percpu_alloc may move the per-CPU objects around in
+        * memory.
+        */
+       pcpu = percpu_getref(class->prc_percpu);
+       KASSERTMSG(psref->psref_prevp == NULL || *psref->psref_prevp == psref,
+           "psref %p prevp %p points at %p",
+           psref, psref->psref_prevp, *psref->psref_prevp);
+       KASSERTMSG(psref->psref_prevp != NULL || pcpu->pcpu_first == psref,
+           "psref %p marked as first but psref_cpu %p on %d first is %p",
+           psref, pcpu, cpu_index(curcpu()), pcpu->pcpu_first);
+       *(psref->psref_prevp ? psref->psref_prevp : &pcpu->pcpu_first) =
+           psref->psref_next;
+       percpu_putref(class->prc_percpu);
+
+       psref->psref_prevp = (void *)1; /* poison */
+       psref->psref_next = (void *)2;  /* poison */
        splx(s);
 
        /* If someone is waiting for users to drain, notify 'em.  */
@@ -353,7 +377,11 @@ psref_copy(struct psref *pto, const stru
        pcpu = percpu_getref(class->prc_percpu);
 
        /* Record the new reference.  */
-       LIST_INSERT_HEAD(&pcpu->pcpu_head, pto, psref_entry);
+       if ((pto->psref_next = pcpu->pcpu_first) != NULL)
+               pto->psref_next->psref_prevp = &pto->psref_next;
+       pto->psref_prevp = NULL;        /* head of list */
+       pcpu->pcpu_first = pto;
+
        pto->psref_target = pfrom->psref_target;
        pto->psref_lwp = curlwp;
        pto->psref_cpu = curcpu();
@@ -474,7 +502,9 @@ _psref_held(const struct psref_target *t
        pcpu = percpu_getref(class->prc_percpu);
 
        /* Search through all the references on this CPU.  */
-       LIST_FOREACH(psref, &pcpu->pcpu_head, psref_entry) {
+       for (psref = pcpu->pcpu_first;
+            psref != NULL;
+            psref = psref->psref_next) {
                /* Sanity-check the reference's CPU.  */
                KASSERTMSG((psref->psref_cpu == curcpu()),
                    "passive reference transferred from CPU %u to CPU %u",

Reply via email to