Hi, this splits the lookup table into two parts, for smaller allocations and larger ones this has the following advantages:
- smaller lookup tables (less cache line pollution) - makes large kmem caches possible currently up to min(16384, 4*PAGE_SIZE) - smaller caches allocate from larger pool-pages if that reduces the wastage any objections? kind regards, lars
Index: subr_kmem.c =================================================================== RCS file: /cvsroot/src/sys/kern/subr_kmem.c,v retrieving revision 1.42 diff -u -p -r1.42 subr_kmem.c --- subr_kmem.c 5 Feb 2012 03:40:08 -0000 1.42 +++ subr_kmem.c 1 Mar 2012 16:36:50 -0000 @@ -77,10 +77,18 @@ __KERNEL_RCSID(0, "$NetBSD: subr_kmem.c, #include <lib/libkern/libkern.h> -static const struct kmem_cache_info { +#define KMEM_POOL_ALLOCATORS 4 +struct pool_allocator kmem_pool_allocators[KMEM_POOL_ALLOCATORS]; + +void *kmem_page_alloc(struct pool *, int); +void kmem_page_free(struct pool *, void *); + +struct kmem_cache_info { size_t kc_size; const char * kc_name; -} kmem_cache_sizes[] = { +}; + +static const struct kmem_cache_info kmem_cache_sizes[] = { { 8, "kmem-8" }, { 16, "kmem-16" }, { 24, "kmem-24" }, @@ -103,24 +111,41 @@ static const struct kmem_cache_info { { 512, "kmem-512" }, { 768, "kmem-768" }, { 1024, "kmem-1024" }, + { 0, NULL } +}; + +static const struct kmem_cache_info kmem_cache_big_sizes[] = { { 2048, "kmem-2048" }, + { 3072, "kmem-3072" }, { 4096, "kmem-4096" }, + { 6144, "kmem-6144" }, + { 8192, "kmem-8192" }, + { 12288, "kmem-12288" }, + { 16384, "kmem-16384" }, { 0, NULL } }; /* * KMEM_ALIGN is the smallest guaranteed alignment and also the - * smallest allocateable quantum. Every cache size is a multiply + * smallest allocateable quantum. Every cache size a multiply * of CACHE_LINE_SIZE and gets CACHE_LINE_SIZE alignment. */ #define KMEM_ALIGN 8 #define KMEM_SHIFT 3 -#define KMEM_MAXSIZE 4096 +#define KMEM_MAXSIZE 1024 #define KMEM_CACHE_COUNT (KMEM_MAXSIZE >> KMEM_SHIFT) static pool_cache_t kmem_cache[KMEM_CACHE_COUNT] __cacheline_aligned; static size_t kmem_cache_maxidx __read_mostly; +#define KMEM_BIG_ALIGN 1024 +#define KMEM_BIG_SHIFT 10 +#define KMEM_BIG_MAXSIZE 16384 +#define KMEM_CACHE_BIG_COUNT (KMEM_BIG_MAXSIZE >> KMEM_BIG_SHIFT) + +static pool_cache_t kmem_cache_big[KMEM_CACHE_BIG_COUNT] __cacheline_aligned; +static size_t kmem_cache_big_maxidx __read_mostly; + #if defined(DEBUG) int kmem_guard_depth = 0; size_t kmem_guard_size; @@ -163,7 +188,7 @@ CTASSERT(KM_NOSLEEP == PR_NOWAIT); void * kmem_intr_alloc(size_t size, km_flag_t kmflags) { - size_t allocsz, index; + size_t allocsz, index, bigidx; pool_cache_t pc; uint8_t *p; @@ -177,8 +202,13 @@ kmem_intr_alloc(size_t size, km_flag_t k #endif allocsz = kmem_roundup_size(size) + REDZONE_SIZE + SIZE_SIZE; index = (allocsz - 1) >> KMEM_SHIFT; + bigidx = (allocsz - 1) >> KMEM_BIG_SHIFT; - if (index >= kmem_cache_maxidx) { + if (index < kmem_cache_maxidx) { + pc = kmem_cache[index]; + } else if (bigidx < kmem_cache_big_maxidx) { + pc = kmem_cache_big[bigidx]; + } else { int ret = uvm_km_kmem_alloc(kmem_va_arena, (vsize_t)round_page(allocsz), ((kmflags & KM_SLEEP) ? VM_SLEEP : VM_NOSLEEP) @@ -186,7 +216,6 @@ kmem_intr_alloc(size_t size, km_flag_t k return ret ? NULL : p; } - pc = kmem_cache[index]; p = pool_cache_get(pc, kmflags); if (__predict_true(p != NULL)) { @@ -212,7 +241,7 @@ kmem_intr_zalloc(size_t size, km_flag_t void kmem_intr_free(void *p, size_t size) { - size_t allocsz, index; + size_t allocsz, index, bigidx; pool_cache_t pc; KASSERT(p != NULL); @@ -226,8 +255,13 @@ kmem_intr_free(void *p, size_t size) #endif allocsz = kmem_roundup_size(size) + REDZONE_SIZE + SIZE_SIZE; index = (allocsz - 1) >> KMEM_SHIFT; + bigidx = (allocsz - 1) >> KMEM_BIG_SHIFT; - if (index >= kmem_cache_maxidx) { + if (index < kmem_cache_maxidx) { + pc = kmem_cache[index]; + } else if (bigidx < kmem_cache_big_maxidx) { + pc = kmem_cache_big[bigidx]; + } else { uvm_km_kmem_free(kmem_va_arena, (vaddr_t)p, round_page(allocsz)); return; @@ -239,7 +273,6 @@ kmem_intr_free(void *p, size_t size) kmem_poison_check((uint8_t *)p + size, allocsz - size - SIZE_SIZE); kmem_poison_fill(p, allocsz); - pc = kmem_cache[index]; pool_cache_put(pc, p); } @@ -287,17 +320,20 @@ kmem_free(void *p, size_t size) kmem_intr_free(p, size); } -static void +static size_t kmem_create_caches(const struct kmem_cache_info *array, - pool_cache_t alloc_table[], size_t maxsize) + pool_cache_t alloc_table[], size_t maxsize, int shift) { - size_t table_unit = (1 << KMEM_SHIFT); + size_t maxidx = 0; + size_t table_unit = (1 << shift); size_t size = table_unit; int i; for (i = 0; array[i].kc_size != 0 ; i++) { const char *name = array[i].kc_name; size_t cache_size = array[i].kc_size; + struct pool_allocator *pa; + int wastage; int flags = PR_NOALIGN; pool_cache_t pc; size_t align; @@ -316,35 +352,66 @@ kmem_create_caches(const struct kmem_cac if (cache_size > maxsize) { break; } - if ((cache_size >> KMEM_SHIFT) > kmem_cache_maxidx) { - kmem_cache_maxidx = cache_size >> KMEM_SHIFT; + if ((cache_size >> shift) > maxidx) { + maxidx = cache_size >> shift; + } + + /* determin the most efficient pool_allocator */ + pa = &kmem_pool_allocators[0]; + wastage = pa->pa_pagesz - + ((pa->pa_pagesz / cache_size) * cache_size); + + for (int pai = 1; pai < KMEM_POOL_ALLOCATORS; pai++) { + struct pool_allocator *npa = + &kmem_pool_allocators[pai]; + int nwastage = npa->pa_pagesz - + ((npa->pa_pagesz / cache_size) * cache_size); + + if (nwastage + 128 < wastage) { + pa = npa; + wastage = nwastage; + } + } + + if ((cache_size >> shift) > maxidx) { + maxidx = cache_size >> shift; } #if defined(KMEM_POISON) pc = pool_cache_init(cache_size, align, 0, flags, - name, &pool_allocator_kmem, IPL_VM, kmem_poison_ctor, + name, pa, IPL_VM, kmem_poison_ctor, NULL, (void *)cache_size); #else /* defined(KMEM_POISON) */ pc = pool_cache_init(cache_size, align, 0, flags, - name, &pool_allocator_kmem, IPL_VM, NULL, NULL, NULL); + name, pa, IPL_VM, NULL, NULL, NULL); #endif /* defined(KMEM_POISON) */ while (size <= cache_size) { - alloc_table[(size - 1) >> KMEM_SHIFT] = pc; + alloc_table[(size - 1) >> shift] = pc; size += table_unit; } } + return maxidx; } void kmem_init(void) { + for (int i = 0; i < KMEM_POOL_ALLOCATORS; i++) { + kmem_pool_allocators[i].pa_alloc = kmem_page_alloc; + kmem_pool_allocators[i].pa_free = kmem_page_free; + kmem_pool_allocators[i].pa_pagesz = PAGE_SIZE * (i + 1); + } + #ifdef KMEM_GUARD uvm_kmguard_init(&kmem_guard, &kmem_guard_depth, &kmem_guard_size, kmem_va_arena); #endif - kmem_create_caches(kmem_cache_sizes, kmem_cache, KMEM_MAXSIZE); + kmem_cache_maxidx = kmem_create_caches(kmem_cache_sizes, + kmem_cache, KMEM_MAXSIZE, KMEM_SHIFT); + kmem_cache_big_maxidx = kmem_create_caches(kmem_cache_big_sizes, + kmem_cache_big, KMEM_POOL_ALLOCATORS * PAGE_SIZE, KMEM_BIG_SHIFT); } size_t @@ -354,6 +421,27 @@ kmem_roundup_size(size_t size) return (size + (KMEM_ALIGN - 1)) & ~(KMEM_ALIGN - 1); } +void * +kmem_page_alloc(struct pool *pp, int flags) +{ + const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; + vmem_addr_t va; + int ret; + + ret = uvm_km_kmem_alloc(kmem_va_arena, pp->pr_alloc->pa_pagesz, + vflags | VM_INSTANTFIT, &va); + + return ret ? NULL : (void *)va; +} + +void +kmem_page_free(struct pool *pp, void *v) +{ + + uvm_km_kmem_free(kmem_va_arena, (vaddr_t)v, pp->pr_alloc->pa_pagesz); +} + + /* ---- debug */ #if defined(KMEM_POISON)