the diff below adds cleanup of idle lists in the cpu caches in pools. the caches on the cpus move lists of items around instead of individual items. these lists are moved to the global pool struct and accumulate there. if you get a burst of work in the pool (eg, you use a lot of mbufs for a short period) you'll end up with a lot of lists in the global pool struct that dont get used.
we gc idle pages in vanilla pools and return them to the system, so this extends that to the cache subsystem. it does this by timestamping the global list of lists when it has been empty, and if it hasnt been empty for a while it returns a list to the pages where they can be gc'ed back to the page allocator. if the items in a pool are being allocated in bursts over relatively short periods, eg, think replenishing rx rings on a nic in rate limited interrupts, then the global lists will be emptied regularly. if we stop rxing packets, then we wont empty the lists and therefore wont timestamp them, making them available for gc. care has be taken when moving items on a cache list back to the pages so the puts arent counted twice (once when the item is put on a free list on a cpu cache, and again when moving from the list back to the pages), so this splits pool_put up. pool_do_put is solely responsible for putting items back into pool pages. this is then called from pool_put (which still does the accounting for normal pools) and the list gc. a neat side effect of this is that the list gc can return multiple items to the pages while only taking the lock around the page structures once. i wrote this in feb, and it's been running solidly for me ever since. i wanted to implement the systat bits beofre putting it in though. the last big pool cache chunk after this is growing the size of the cache lists based on contention. ok? Index: sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.70 diff -u -p -r1.70 pool.h --- sys/pool.h 15 Jun 2017 02:52:30 -0000 1.70 +++ sys/pool.h 15 Jun 2017 11:28:31 -0000 @@ -185,11 +185,13 @@ struct pool { unsigned long pr_cache_magic[2]; struct mutex pr_cache_mtx; struct pool_cache_lists - pr_cache_lists; - u_int pr_cache_nlist; /* # of lists */ + pr_cache_lists; /* list of idle item lists */ + u_int pr_cache_nlist; /* # of idle lists */ u_int pr_cache_items; /* target list length */ u_int pr_cache_contention; + int pr_cache_tick; /* time idle list was empty */ int pr_cache_nout; + uint64_t pr_cache_ngc; /* # of times the gc released a list */ u_int pr_align; u_int pr_maxcolors; /* Cache coloring */ Index: kern/subr_pool.c =================================================================== RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.212 diff -u -p -r1.212 subr_pool.c --- kern/subr_pool.c 15 Jun 2017 03:50:50 -0000 1.212 +++ kern/subr_pool.c 15 Jun 2017 11:28:31 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_pool.c,v 1.212 2017/06/15 03:50:50 dlg Exp $ */ +/* $OpenBSD: subr_pool.c,v 1.211 2017/06/15 03:48:50 dlg Exp $ */ /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ /*- @@ -135,6 +135,7 @@ struct pool_cache { void *pool_cache_get(struct pool *); void pool_cache_put(struct pool *, void *); void pool_cache_destroy(struct pool *); +void pool_cache_gc(struct pool *); #endif void pool_cache_pool_info(struct pool *, struct kinfo_pool *); int pool_cache_info(struct pool *, void *, size_t *); @@ -156,6 +157,7 @@ void pool_p_free(struct pool *, struct void pool_update_curpage(struct pool *); void *pool_do_get(struct pool *, int, int *); +void pool_do_put(struct pool *, void *); int pool_chk_page(struct pool *, struct pool_page_header *, int); int pool_chk(struct pool *); void pool_get_done(void *, void *); @@ -711,7 +713,6 @@ pool_do_get(struct pool *pp, int flags, void pool_put(struct pool *pp, void *v) { - struct pool_item *pi = v; struct pool_page_header *ph, *freeph = NULL; #ifdef DIAGNOSTIC @@ -728,6 +729,37 @@ pool_put(struct pool *pp, void *v) mtx_enter(&pp->pr_mtx); + pool_do_put(pp, v); + + pp->pr_nout--; + pp->pr_nput++; + + /* is it time to free a page? */ + if (pp->pr_nidle > pp->pr_maxpages && + (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && + (ticks - ph->ph_tick) > (hz * pool_wait_free)) { + freeph = ph; + pool_p_remove(pp, freeph); + } + + mtx_leave(&pp->pr_mtx); + + if (freeph != NULL) + pool_p_free(pp, freeph); + + if (!TAILQ_EMPTY(&pp->pr_requests)) { + mtx_enter(&pp->pr_requests_mtx); + pool_runqueue(pp, PR_NOWAIT); + mtx_leave(&pp->pr_requests_mtx); + } +} + +void +pool_do_put(struct pool *pp, void *v) +{ + struct pool_item *pi = v; + struct pool_page_header *ph; + splassert(pp->pr_ipl); ph = pr_find_pagehead(pp, v); @@ -771,27 +803,6 @@ pool_put(struct pool *pp, void *v) TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); pool_update_curpage(pp); } - - pp->pr_nout--; - pp->pr_nput++; - - /* is it time to free a page? */ - if (pp->pr_nidle > pp->pr_maxpages && - (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && - (ticks - ph->ph_tick) > (hz * pool_wait_free)) { - freeph = ph; - pool_p_remove(pp, freeph); - } - mtx_leave(&pp->pr_mtx); - - if (freeph != NULL) - pool_p_free(pp, freeph); - - if (!TAILQ_EMPTY(&pp->pr_requests)) { - mtx_enter(&pp->pr_requests_mtx); - pool_runqueue(pp, PR_NOWAIT); - mtx_leave(&pp->pr_requests_mtx); - } } /* @@ -1466,6 +1477,11 @@ pool_gc_pages(void *null) rw_enter_read(&pool_lock); s = splvm(); /* XXX go to splvm until all pools _setipl properly */ SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { +#ifdef MULTIPROCESSOR + if (pp->pr_cache != NULL) + pool_cache_gc(pp); +#endif + if (pp->pr_nidle <= pp->pr_minpages || /* guess */ !mtx_enter_try(&pp->pr_mtx)) /* try */ continue; @@ -1632,8 +1648,10 @@ pool_cache_init(struct pool *pp) arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); TAILQ_INIT(&pp->pr_cache_lists); pp->pr_cache_nlist = 0; + pp->pr_cache_tick = ticks; pp->pr_cache_items = 8; pp->pr_cache_contention = 0; + pp->pr_cache_ngc = 0; CPUMEM_FOREACH(pc, &i, cm) { pc->pc_actv = NULL; @@ -1649,6 +1667,8 @@ pool_cache_init(struct pool *pp) pc->pc_nout = 0; } + membar_producer(); + pp->pr_cache = cm; } @@ -1730,6 +1750,9 @@ pool_cache_list_free(struct pool *pp, st struct pool_cache_item *ci) { pool_list_enter(pp); + if (TAILQ_EMPTY(&pp->pr_cache_lists)) + pp->pr_cache_tick = ticks; + TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); pp->pr_cache_nlist++; @@ -1864,11 +1887,13 @@ pool_cache_list_put(struct pool *pp, str rpl = TAILQ_NEXT(pl, ci_nextl); + mtx_enter(&pp->pr_mtx); do { next = pl->ci_next; - pool_put(pp, pl); + pool_do_put(pp, pl); pl = next; } while (pl != NULL); + mtx_leave(&pp->pr_mtx); return (rpl); } @@ -1881,8 +1906,10 @@ pool_cache_destroy(struct pool *pp) struct cpumem_iter i; struct cpumem *cm; + rw_enter_write(&pool_lock); /* serialise with the gc */ cm = pp->pr_cache; pp->pr_cache = NULL; /* make pool_put avoid the cache */ + rw_exit_write(&pool_lock); CPUMEM_FOREACH(pc, &i, cm) { pool_cache_list_put(pp, pc->pc_actv); @@ -1897,6 +1924,29 @@ pool_cache_destroy(struct pool *pp) } void +pool_cache_gc(struct pool *pp) +{ + if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) && + !TAILQ_EMPTY(&pp->pr_cache_lists) && + mtx_enter_try(&pp->pr_cache_mtx)) { + struct pool_cache_item *pl = NULL; + + pl = TAILQ_FIRST(&pp->pr_cache_lists); + if (pl != NULL) { + TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); + pp->pr_cache_tick = ticks; + pp->pr_cache_nlist--; + + pp->pr_cache_ngc++; + } + + mtx_leave(&pp->pr_cache_mtx); + + pool_cache_list_put(pp, pl); + } +} + +void pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) { struct pool_cache *pc; @@ -1943,7 +1993,7 @@ pool_cache_info(struct pool *pp, void *o memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ mtx_enter(&pp->pr_cache_mtx); - kpc.pr_ngc = 0; /* notyet */ + kpc.pr_ngc = pp->pr_cache_ngc; kpc.pr_len = pp->pr_cache_items; kpc.pr_nlist = pp->pr_cache_nlist; kpc.pr_contention = pp->pr_cache_contention;