Re: per-cpu caches for pools

David Gwynne Wed, 26 Oct 2016 20:55:33 -0700

On Tue, Oct 25, 2016 at 10:35:45AM +1000, David Gwynne wrote:
> On Mon, Oct 24, 2016 at 04:24:13PM +1000, David Gwynne wrote:
> > ive posted this before as part of a much bigger diff, but smaller
> > is better.
> > 
> > it basically lets things ask for per-cpu item caches to be enabled
> > on pools. the most obvious use case for this is the mbuf pools.
> > 
> > the caches are modelled on whats described in the "Magazines and
> > Vmem: Extending the Slab Allocator to Many CPUs and Arbitrary
> > Resources" paper by Jeff Bonwick and Jonathan Adams. pools are
> > modelled on slabs, which bonwick described in a previous paper.
> > 
> > the main inspiration the paper provided was around how many objects
> > to cache on each cpu, and how often to move sets of objects between
> > the cpu caches and a global list of objects. unlike the paper we
> > do not care about maintaining constructed objects on the free lists,
> > so we reuse the objects themselves to build the free list.
> > 
> > id like to get this in so we can tinker with it in tree. the things
> > i think we need to tinker with are what poisioning we can get away
> > with on the per cpu caches, and what limits can we enforce at the
> > pool level.
> > 
> > i think poisioning will be relatively simple to add. the limits one
> > is more challenging because we dont want the pools to have to
> > coordinate between cpus for every get or put operation. my thought
> > there was to limit the number of pages that a pool can allocate
> > from its backend rather than limit the items the pool can provide.
> > limiting the pages could also be done at a lower level. eg, the
> > mbuf clusters could share a common backend that limits the pages
> > the pools can get, rather than have the cluster pools account for
> > pages separately.
> > 
> > anyway, either way i would like to get this in so we can work on
> > this stuff.
> > 
> > ok?
> 
> this adds per-cpu caches to the mbuf pools so people can actually
> try and see if the code works or not.


this fixes a crash hrvoje and i found independently. avoid holding
a mutex when calling yield().

also some whitespace fixes.

Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.198
diff -u -p -r1.198 subr_pool.c
--- kern/subr_pool.c    15 Sep 2016 02:00:16 -0000      1.198
+++ kern/subr_pool.c    27 Oct 2016 03:51:10 -0000
@@ -42,6 +42,7 @@
 #include <sys/sysctl.h>
 #include <sys/task.h>
 #include <sys/timeout.h>
+#include <sys/percpu.h>
 
 #include <uvm/uvm_extern.h>
 
@@ -96,6 +97,33 @@ struct pool_item {
 };
 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
 
+#ifdef MULTIPROCESSOR
+struct pool_list {
+       struct pool_list        *pl_next;       /* next in list */
+       unsigned long            pl_cookie;
+       struct pool_list        *pl_nextl;      /* next list */
+       unsigned long            pl_nitems;     /* items in list */
+};
+
+struct pool_cache {
+       struct pool_list        *pc_actv;
+       unsigned long            pc_nactv;      /* cache pc_actv nitems */
+       struct pool_list        *pc_prev;
+
+       uint64_t                 pc_gen;        /* generation number */
+       uint64_t                 pc_gets;
+       uint64_t                 pc_puts;
+       uint64_t                 pc_fails;
+
+       int                      pc_nout;
+};
+
+void   *pool_cache_get(struct pool *);
+void    pool_cache_put(struct pool *, void *);
+void    pool_cache_destroy(struct pool *);
+#endif
+void    pool_cache_info(struct pool *, struct kinfo_pool *);
+
 #ifdef POOL_DEBUG
 int    pool_debug = 1;
 #else
@@ -355,6 +383,11 @@ pool_destroy(struct pool *pp)
        struct pool_item_header *ph;
        struct pool *prev, *iter;
 
+#ifdef MULTIPROCESSOR
+       if (pp->pr_cache != NULL)
+               pool_cache_destroy(pp);
+#endif
+
 #ifdef DIAGNOSTIC
        if (pp->pr_nout != 0)
                panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
@@ -421,6 +454,14 @@ pool_get(struct pool *pp, int flags)
        void *v = NULL;
        int slowdown = 0;
 
+#ifdef MULTIPROCESSOR
+       if (pp->pr_cache != NULL) {
+               v = pool_cache_get(pp);
+               if (v != NULL)
+                       goto good;
+       }
+#endif
+
        KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
 
        mtx_enter(&pp->pr_mtx);
@@ -453,6 +494,9 @@ pool_get(struct pool *pp, int flags)
                v = mem.v;
        }
 
+#ifdef MULTIPROCESSOR
+good:
+#endif
        if (ISSET(flags, PR_ZERO))
                memset(v, 0, pp->pr_size);
 
@@ -631,6 +675,13 @@ pool_put(struct pool *pp, void *v)
                panic("%s: NULL item", __func__);
 #endif
 
+#ifdef MULTIPROCESSOR
+       if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
+               pool_cache_put(pp, v);
+               return;
+       }
+#endif
+
        mtx_enter(&pp->pr_mtx);
 
        splassert(pp->pr_ipl);
@@ -1333,6 +1384,8 @@ sysctl_dopool(int *name, u_int namelen, 
                pi.pr_nidle = pp->pr_nidle;
                mtx_leave(&pp->pr_mtx);
 
+               pool_cache_info(pp, &pi);
+
                rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
                break;
        }
@@ -1499,3 +1552,265 @@ pool_multi_free_ni(struct pool *pp, void
        km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
        KERNEL_UNLOCK();
 }
+
+#ifdef MULTIPROCESSOR
+
+struct pool pool_caches; /* per cpu cache entries */
+
+void
+pool_cache_init(struct pool *pp)
+{
+       struct cpumem *cm;
+       struct pool_cache *pc;
+       struct cpumem_iter i;
+
+       if (pool_caches.pr_size == 0) {
+               pool_init(&pool_caches, sizeof(struct pool_cache), 64,
+                   IPL_NONE, PR_WAITOK, "plcache", NULL);
+       }
+
+       KASSERT(pp->pr_size >= sizeof(*pc));
+
+       cm = cpumem_get(&pool_caches);
+
+       mtx_init(&pp->pr_cache_mtx, pp->pr_ipl);
+       pp->pr_cache_list = NULL;
+       pp->pr_cache_nlist = 0;
+       pp->pr_cache_items = 8;
+       pp->pr_cache_contention = 0;
+
+       CPUMEM_FOREACH(pc, &i, cm) {
+               pc->pc_actv = NULL;
+               pc->pc_nactv = 0;
+               pc->pc_prev = NULL;
+
+               pc->pc_gets = 0;
+               pc->pc_puts = 0;
+               pc->pc_fails = 0;
+               pc->pc_nout = 0;
+       }
+
+       pp->pr_cache = cm;
+}
+
+static inline void
+pool_list_enter(struct pool *pp)
+{
+       if (mtx_enter_try(&pp->pr_cache_mtx) == 0) {
+               mtx_enter(&pp->pr_cache_mtx);
+               pp->pr_cache_contention++;
+       }
+}
+
+static inline void
+pool_list_leave(struct pool *pp)
+{
+       mtx_leave(&pp->pr_cache_mtx);
+}
+
+static inline struct pool_list *
+pool_list_alloc(struct pool *pp, struct pool_cache *pc)
+{
+       struct pool_list *pl;
+
+       pool_list_enter(pp);
+       pl = pp->pr_cache_list;
+       if (pl != NULL) {
+               pp->pr_cache_list = pl->pl_nextl;
+               pp->pr_cache_nlist--;
+       }
+
+       pp->pr_cache_nout += pc->pc_nout;
+       pc->pc_nout = 0;
+       pool_list_leave(pp);
+
+       return (pl);
+}
+
+static inline void
+pool_list_free(struct pool *pp, struct pool_cache *pc, struct pool_list *pl)
+{
+       pool_list_enter(pp);
+       pl->pl_nextl = pp->pr_cache_list;
+       pp->pr_cache_list = pl;
+       pp->pr_cache_nlist++;
+
+       pp->pr_cache_nout += pc->pc_nout;
+       pc->pc_nout = 0;
+       pool_list_leave(pp);
+}
+
+static inline struct pool_cache *
+pool_cache_enter(struct pool *pp, int *s)
+{
+       struct pool_cache *pc;
+
+       pc = cpumem_enter(pp->pr_cache);
+       *s = splraise(pp->pr_ipl);
+       pc->pc_gen++;
+
+       return (pc);
+}
+
+static inline void
+pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
+{
+       pc->pc_gen++;
+       splx(s);
+       cpumem_leave(pp->pr_cache, pc);
+}
+
+void *
+pool_cache_get(struct pool *pp)
+{
+       struct pool_cache *pc;
+       struct pool_list *pl;
+       int s;
+
+       pc = pool_cache_enter(pp, &s);
+
+       if (pc->pc_actv != NULL) {
+               pl = pc->pc_actv;
+       } else if (pc->pc_prev != NULL) {
+               pl = pc->pc_prev;
+               pc->pc_prev = NULL;
+       } else if ((pl = pool_list_alloc(pp, pc)) == NULL) {
+               pc->pc_fails++;
+               goto done;
+       }
+
+       pc->pc_actv = pl->pl_next;
+       pc->pc_nactv = pl->pl_nitems - 1;
+       pc->pc_gets++;
+       pc->pc_nout++;
+done:
+       pool_cache_leave(pp, pc, s);
+
+       return (pl);
+}
+
+void
+pool_cache_put(struct pool *pp, void *v)
+{
+       struct pool_cache *pc;
+       struct pool_list *pl = v;
+       unsigned long cache_items = pp->pr_cache_items;
+       unsigned long nitems;
+       int s;
+
+       pc = pool_cache_enter(pp, &s);
+
+       nitems = pc->pc_nactv;
+       if (nitems >= cache_items) {
+               if (pc->pc_prev != NULL)
+                       pool_list_free(pp, pc, pc->pc_prev);
+                       
+               pc->pc_prev = pc->pc_actv;
+
+               pc->pc_actv = NULL;
+               pc->pc_nactv = 0;
+               nitems = 0;
+       }
+
+       pl->pl_next = pc->pc_actv;
+       pl->pl_nitems = ++nitems;
+
+       pc->pc_actv = pl;
+       pc->pc_nactv = nitems;
+
+       pc->pc_puts++;
+       pc->pc_nout--;
+
+       pool_cache_leave(pp, pc, s);
+}
+
+struct pool_list *
+pool_list_put(struct pool *pp, struct pool_list *pl)
+{
+       struct pool_list *rpl, *npl;
+
+       if (pl == NULL)
+               return (NULL);
+
+       rpl = (struct pool_list *)pl->pl_next;
+
+       do {
+               npl = pl->pl_next;
+               pool_put(pp, pl);
+               pl = npl;
+       } while (pl != NULL);
+
+       return (rpl);
+}
+
+void
+pool_cache_destroy(struct pool *pp)
+{
+       struct pool_cache *pc;
+       struct pool_list *pl;
+       struct cpumem_iter i;
+       struct cpumem *cm;
+
+       cm = pp->pr_cache;
+       pp->pr_cache = NULL; /* make pool_put avoid the cache */
+
+       CPUMEM_FOREACH(pc, &i, cm) {
+               pool_list_put(pp, pc->pc_actv);
+               pool_list_put(pp, pc->pc_prev);
+       }
+
+       cpumem_put(&pool_caches, cm);
+
+       pl = pp->pr_cache_list;
+       while (pl != NULL)
+               pl = pool_list_put(pp, pl);
+}
+
+void
+pool_cache_info(struct pool *pp, struct kinfo_pool *pi)
+{
+       struct pool_cache *pc;
+       struct cpumem_iter i;
+
+       if (pp->pr_cache == NULL)
+               return;
+
+       /* loop through the caches twice to collect stats */
+
+       /* once without the mtx so we can yield while reading nget/nput */
+       CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
+               uint64_t gen, nget, nput;
+
+               do {
+                       while ((gen = pc->pc_gen) & 1)
+                               yield();
+
+                       nget = pc->pc_gets;
+                       nput = pc->pc_puts;
+               } while (gen != pc->pc_gen);
+
+               pi->pr_nget += nget;
+               pi->pr_nput += nput;
+       }
+
+       /* and once with the mtx so we can get consistent nout values */
+       mtx_enter(&pp->pr_cache_mtx);
+       CPUMEM_FOREACH(pc, &i, pp->pr_cache)
+               pi->pr_nout += pc->pc_nout;
+
+       pi->pr_nout += pp->pr_cache_nout;
+       mtx_leave(&pp->pr_cache_mtx);
+}
+#else /* MULTIPROCESSOR */
+void
+pool_cache_init(struct pool *pp)
+{
+       /* nop */
+}
+
+void
+pool_cache_info(struct pool *pp, struct kinfo_pool *pi)
+{
+       /* nop */
+}
+#endif /* MULTIPROCESSOR */
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.237
diff -u -p -r1.237 uipc_mbuf.c
--- kern/uipc_mbuf.c    27 Oct 2016 03:29:55 -0000      1.237
+++ kern/uipc_mbuf.c    27 Oct 2016 03:51:10 -0000
@@ -186,7 +186,15 @@ mbinit(void)
 void
 mbcpuinit()
 {
+       int i;
+
        mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT, M_DEVBUF);
+
+       pool_cache_init(&mbpool);
+       pool_cache_init(&mtagpool);
+
+       for (i = 0; i < nitems(mclsizes); i++)
+               pool_cache_init(&mclpools[i]);
 }
 
 void
Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.63
diff -u -p -r1.63 pool.h
--- sys/pool.h  15 Sep 2016 02:00:16 -0000      1.63
+++ sys/pool.h  27 Oct 2016 03:51:10 -0000
@@ -84,6 +84,9 @@ struct pool_allocator {
 
 TAILQ_HEAD(pool_pagelist, pool_item_header);
 
+struct pool_list;
+struct cpumem;
+
 struct pool {
        struct mutex    pr_mtx;
        SIMPLEQ_ENTRY(pool)
@@ -124,6 +127,15 @@ struct pool {
        RBT_HEAD(phtree, pool_item_header)
                        pr_phtree;
 
+       struct cpumem * pr_cache;
+       struct mutex    pr_cache_mtx;
+       struct pool_list *
+                       pr_cache_list;
+       u_int           pr_cache_nlist;
+       u_int           pr_cache_items;
+       u_int           pr_cache_contention;
+       int             pr_cache_nout;
+
        u_int           pr_align;
        u_int           pr_maxcolors;   /* Cache coloring */
        int             pr_phoffset;    /* Offset in page of page header */
@@ -175,6 +187,7 @@ struct pool_request {
 
 void           pool_init(struct pool *, size_t, u_int, int, int,
                    const char *, struct pool_allocator *);
+void           pool_cache_init(struct pool *);
 void           pool_destroy(struct pool *);
 void           pool_setlowat(struct pool *, int);
 void           pool_sethiwat(struct pool *, int);

Re: per-cpu caches for pools

Reply via email to