On Tue, Oct 25, 2016 at 10:35:45AM +1000, David Gwynne wrote: > On Mon, Oct 24, 2016 at 04:24:13PM +1000, David Gwynne wrote: > > ive posted this before as part of a much bigger diff, but smaller > > is better. > > > > it basically lets things ask for per-cpu item caches to be enabled > > on pools. the most obvious use case for this is the mbuf pools. > > > > the caches are modelled on whats described in the "Magazines and > > Vmem: Extending the Slab Allocator to Many CPUs and Arbitrary > > Resources" paper by Jeff Bonwick and Jonathan Adams. pools are > > modelled on slabs, which bonwick described in a previous paper. > > > > the main inspiration the paper provided was around how many objects > > to cache on each cpu, and how often to move sets of objects between > > the cpu caches and a global list of objects. unlike the paper we > > do not care about maintaining constructed objects on the free lists, > > so we reuse the objects themselves to build the free list. > > > > id like to get this in so we can tinker with it in tree. the things > > i think we need to tinker with are what poisioning we can get away > > with on the per cpu caches, and what limits can we enforce at the > > pool level. > > > > i think poisioning will be relatively simple to add. the limits one > > is more challenging because we dont want the pools to have to > > coordinate between cpus for every get or put operation. my thought > > there was to limit the number of pages that a pool can allocate > > from its backend rather than limit the items the pool can provide. > > limiting the pages could also be done at a lower level. eg, the > > mbuf clusters could share a common backend that limits the pages > > the pools can get, rather than have the cluster pools account for > > pages separately. > > > > anyway, either way i would like to get this in so we can work on > > this stuff. > > > > ok? > > this adds per-cpu caches to the mbuf pools so people can actually > try and see if the code works or not.
this fixes a crash hrvoje and i found independently. avoid holding a mutex when calling yield(). also some whitespace fixes. Index: kern/subr_pool.c =================================================================== RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.198 diff -u -p -r1.198 subr_pool.c --- kern/subr_pool.c 15 Sep 2016 02:00:16 -0000 1.198 +++ kern/subr_pool.c 27 Oct 2016 03:51:10 -0000 @@ -42,6 +42,7 @@ #include <sys/sysctl.h> #include <sys/task.h> #include <sys/timeout.h> +#include <sys/percpu.h> #include <uvm/uvm_extern.h> @@ -96,6 +97,33 @@ struct pool_item { }; #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) +#ifdef MULTIPROCESSOR +struct pool_list { + struct pool_list *pl_next; /* next in list */ + unsigned long pl_cookie; + struct pool_list *pl_nextl; /* next list */ + unsigned long pl_nitems; /* items in list */ +}; + +struct pool_cache { + struct pool_list *pc_actv; + unsigned long pc_nactv; /* cache pc_actv nitems */ + struct pool_list *pc_prev; + + uint64_t pc_gen; /* generation number */ + uint64_t pc_gets; + uint64_t pc_puts; + uint64_t pc_fails; + + int pc_nout; +}; + +void *pool_cache_get(struct pool *); +void pool_cache_put(struct pool *, void *); +void pool_cache_destroy(struct pool *); +#endif +void pool_cache_info(struct pool *, struct kinfo_pool *); + #ifdef POOL_DEBUG int pool_debug = 1; #else @@ -355,6 +383,11 @@ pool_destroy(struct pool *pp) struct pool_item_header *ph; struct pool *prev, *iter; +#ifdef MULTIPROCESSOR + if (pp->pr_cache != NULL) + pool_cache_destroy(pp); +#endif + #ifdef DIAGNOSTIC if (pp->pr_nout != 0) panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); @@ -421,6 +454,14 @@ pool_get(struct pool *pp, int flags) void *v = NULL; int slowdown = 0; +#ifdef MULTIPROCESSOR + if (pp->pr_cache != NULL) { + v = pool_cache_get(pp); + if (v != NULL) + goto good; + } +#endif + KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); mtx_enter(&pp->pr_mtx); @@ -453,6 +494,9 @@ pool_get(struct pool *pp, int flags) v = mem.v; } +#ifdef MULTIPROCESSOR +good: +#endif if (ISSET(flags, PR_ZERO)) memset(v, 0, pp->pr_size); @@ -631,6 +675,13 @@ pool_put(struct pool *pp, void *v) panic("%s: NULL item", __func__); #endif +#ifdef MULTIPROCESSOR + if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { + pool_cache_put(pp, v); + return; + } +#endif + mtx_enter(&pp->pr_mtx); splassert(pp->pr_ipl); @@ -1333,6 +1384,8 @@ sysctl_dopool(int *name, u_int namelen, pi.pr_nidle = pp->pr_nidle; mtx_leave(&pp->pr_mtx); + pool_cache_info(pp, &pi); + rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); break; } @@ -1499,3 +1552,265 @@ pool_multi_free_ni(struct pool *pp, void km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); KERNEL_UNLOCK(); } + +#ifdef MULTIPROCESSOR + +struct pool pool_caches; /* per cpu cache entries */ + +void +pool_cache_init(struct pool *pp) +{ + struct cpumem *cm; + struct pool_cache *pc; + struct cpumem_iter i; + + if (pool_caches.pr_size == 0) { + pool_init(&pool_caches, sizeof(struct pool_cache), 64, + IPL_NONE, PR_WAITOK, "plcache", NULL); + } + + KASSERT(pp->pr_size >= sizeof(*pc)); + + cm = cpumem_get(&pool_caches); + + mtx_init(&pp->pr_cache_mtx, pp->pr_ipl); + pp->pr_cache_list = NULL; + pp->pr_cache_nlist = 0; + pp->pr_cache_items = 8; + pp->pr_cache_contention = 0; + + CPUMEM_FOREACH(pc, &i, cm) { + pc->pc_actv = NULL; + pc->pc_nactv = 0; + pc->pc_prev = NULL; + + pc->pc_gets = 0; + pc->pc_puts = 0; + pc->pc_fails = 0; + pc->pc_nout = 0; + } + + pp->pr_cache = cm; +} + +static inline void +pool_list_enter(struct pool *pp) +{ + if (mtx_enter_try(&pp->pr_cache_mtx) == 0) { + mtx_enter(&pp->pr_cache_mtx); + pp->pr_cache_contention++; + } +} + +static inline void +pool_list_leave(struct pool *pp) +{ + mtx_leave(&pp->pr_cache_mtx); +} + +static inline struct pool_list * +pool_list_alloc(struct pool *pp, struct pool_cache *pc) +{ + struct pool_list *pl; + + pool_list_enter(pp); + pl = pp->pr_cache_list; + if (pl != NULL) { + pp->pr_cache_list = pl->pl_nextl; + pp->pr_cache_nlist--; + } + + pp->pr_cache_nout += pc->pc_nout; + pc->pc_nout = 0; + pool_list_leave(pp); + + return (pl); +} + +static inline void +pool_list_free(struct pool *pp, struct pool_cache *pc, struct pool_list *pl) +{ + pool_list_enter(pp); + pl->pl_nextl = pp->pr_cache_list; + pp->pr_cache_list = pl; + pp->pr_cache_nlist++; + + pp->pr_cache_nout += pc->pc_nout; + pc->pc_nout = 0; + pool_list_leave(pp); +} + +static inline struct pool_cache * +pool_cache_enter(struct pool *pp, int *s) +{ + struct pool_cache *pc; + + pc = cpumem_enter(pp->pr_cache); + *s = splraise(pp->pr_ipl); + pc->pc_gen++; + + return (pc); +} + +static inline void +pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) +{ + pc->pc_gen++; + splx(s); + cpumem_leave(pp->pr_cache, pc); +} + +void * +pool_cache_get(struct pool *pp) +{ + struct pool_cache *pc; + struct pool_list *pl; + int s; + + pc = pool_cache_enter(pp, &s); + + if (pc->pc_actv != NULL) { + pl = pc->pc_actv; + } else if (pc->pc_prev != NULL) { + pl = pc->pc_prev; + pc->pc_prev = NULL; + } else if ((pl = pool_list_alloc(pp, pc)) == NULL) { + pc->pc_fails++; + goto done; + } + + pc->pc_actv = pl->pl_next; + pc->pc_nactv = pl->pl_nitems - 1; + pc->pc_gets++; + pc->pc_nout++; +done: + pool_cache_leave(pp, pc, s); + + return (pl); +} + +void +pool_cache_put(struct pool *pp, void *v) +{ + struct pool_cache *pc; + struct pool_list *pl = v; + unsigned long cache_items = pp->pr_cache_items; + unsigned long nitems; + int s; + + pc = pool_cache_enter(pp, &s); + + nitems = pc->pc_nactv; + if (nitems >= cache_items) { + if (pc->pc_prev != NULL) + pool_list_free(pp, pc, pc->pc_prev); + + pc->pc_prev = pc->pc_actv; + + pc->pc_actv = NULL; + pc->pc_nactv = 0; + nitems = 0; + } + + pl->pl_next = pc->pc_actv; + pl->pl_nitems = ++nitems; + + pc->pc_actv = pl; + pc->pc_nactv = nitems; + + pc->pc_puts++; + pc->pc_nout--; + + pool_cache_leave(pp, pc, s); +} + +struct pool_list * +pool_list_put(struct pool *pp, struct pool_list *pl) +{ + struct pool_list *rpl, *npl; + + if (pl == NULL) + return (NULL); + + rpl = (struct pool_list *)pl->pl_next; + + do { + npl = pl->pl_next; + pool_put(pp, pl); + pl = npl; + } while (pl != NULL); + + return (rpl); +} + +void +pool_cache_destroy(struct pool *pp) +{ + struct pool_cache *pc; + struct pool_list *pl; + struct cpumem_iter i; + struct cpumem *cm; + + cm = pp->pr_cache; + pp->pr_cache = NULL; /* make pool_put avoid the cache */ + + CPUMEM_FOREACH(pc, &i, cm) { + pool_list_put(pp, pc->pc_actv); + pool_list_put(pp, pc->pc_prev); + } + + cpumem_put(&pool_caches, cm); + + pl = pp->pr_cache_list; + while (pl != NULL) + pl = pool_list_put(pp, pl); +} + +void +pool_cache_info(struct pool *pp, struct kinfo_pool *pi) +{ + struct pool_cache *pc; + struct cpumem_iter i; + + if (pp->pr_cache == NULL) + return; + + /* loop through the caches twice to collect stats */ + + /* once without the mtx so we can yield while reading nget/nput */ + CPUMEM_FOREACH(pc, &i, pp->pr_cache) { + uint64_t gen, nget, nput; + + do { + while ((gen = pc->pc_gen) & 1) + yield(); + + nget = pc->pc_gets; + nput = pc->pc_puts; + } while (gen != pc->pc_gen); + + pi->pr_nget += nget; + pi->pr_nput += nput; + } + + /* and once with the mtx so we can get consistent nout values */ + mtx_enter(&pp->pr_cache_mtx); + CPUMEM_FOREACH(pc, &i, pp->pr_cache) + pi->pr_nout += pc->pc_nout; + + pi->pr_nout += pp->pr_cache_nout; + mtx_leave(&pp->pr_cache_mtx); +} +#else /* MULTIPROCESSOR */ +void +pool_cache_init(struct pool *pp) +{ + /* nop */ +} + +void +pool_cache_info(struct pool *pp, struct kinfo_pool *pi) +{ + /* nop */ +} +#endif /* MULTIPROCESSOR */ Index: kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.237 diff -u -p -r1.237 uipc_mbuf.c --- kern/uipc_mbuf.c 27 Oct 2016 03:29:55 -0000 1.237 +++ kern/uipc_mbuf.c 27 Oct 2016 03:51:10 -0000 @@ -186,7 +186,15 @@ mbinit(void) void mbcpuinit() { + int i; + mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT, M_DEVBUF); + + pool_cache_init(&mbpool); + pool_cache_init(&mtagpool); + + for (i = 0; i < nitems(mclsizes); i++) + pool_cache_init(&mclpools[i]); } void Index: sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.63 diff -u -p -r1.63 pool.h --- sys/pool.h 15 Sep 2016 02:00:16 -0000 1.63 +++ sys/pool.h 27 Oct 2016 03:51:10 -0000 @@ -84,6 +84,9 @@ struct pool_allocator { TAILQ_HEAD(pool_pagelist, pool_item_header); +struct pool_list; +struct cpumem; + struct pool { struct mutex pr_mtx; SIMPLEQ_ENTRY(pool) @@ -124,6 +127,15 @@ struct pool { RBT_HEAD(phtree, pool_item_header) pr_phtree; + struct cpumem * pr_cache; + struct mutex pr_cache_mtx; + struct pool_list * + pr_cache_list; + u_int pr_cache_nlist; + u_int pr_cache_items; + u_int pr_cache_contention; + int pr_cache_nout; + u_int pr_align; u_int pr_maxcolors; /* Cache coloring */ int pr_phoffset; /* Offset in page of page header */ @@ -175,6 +187,7 @@ struct pool_request { void pool_init(struct pool *, size_t, u_int, int, int, const char *, struct pool_allocator *); +void pool_cache_init(struct pool *); void pool_destroy(struct pool *); void pool_setlowat(struct pool *, int); void pool_sethiwat(struct pool *, int);