Hi,
a few micro-optimization, including getting rid of some statistics
that are not actualy very interesting.
Speedup amounts to a few tenths of percents to a few percents,
depending on how biased the benchmark is.
-Otto
Index: stdlib/malloc.c
===================================================================
RCS file: /home/cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.291
diff -u -p -r1.291 malloc.c
--- stdlib/malloc.c 22 Oct 2023 12:19:26 -0000 1.291
+++ stdlib/malloc.c 24 Oct 2023 14:05:37 -0000
@@ -169,16 +169,12 @@ struct dir_info {
void *caller;
size_t inserts;
size_t insert_collisions;
- size_t finds;
- size_t find_collisions;
size_t deletes;
size_t delete_moves;
size_t cheap_realloc_tries;
size_t cheap_reallocs;
size_t malloc_used; /* bytes allocated */
size_t malloc_guarded; /* bytes used for guards */
- size_t pool_searches; /* searches for pool */
- size_t other_pool; /* searches in other pool */
#define STATS_ADD(x,y) ((x) += (y))
#define STATS_SUB(x,y) ((x) -= (y))
#define STATS_INC(x) ((x)++)
@@ -209,12 +205,14 @@ static void unmap(struct dir_info *d, vo
struct chunk_info {
LIST_ENTRY(chunk_info) entries;
void *page; /* pointer to the page */
+ /* number of shorts should add up to 8, check alloc_chunk_info() */
u_short canary;
u_short bucket;
u_short free; /* how many free chunks */
u_short total; /* how many chunks */
u_short offset; /* requested size table offset */
- u_short bits[1]; /* which chunks are free */
+#define CHUNK_INFO_TAIL 3
+ u_short bits[CHUNK_INFO_TAIL]; /* which chunks are free */
};
#define CHUNK_FREE(i, n) ((i)->bits[(n) / MALLOC_BITS] & (1U << ((n) %
MALLOC_BITS)))
@@ -656,12 +654,10 @@ find(struct dir_info *d, void *p)
index = hash(p) & mask;
r = d->r[index].p;
q = MASK_POINTER(r);
- STATS_INC(d->finds);
while (q != p && r != NULL) {
index = (index - 1) & mask;
r = d->r[index].p;
q = MASK_POINTER(r);
- STATS_INC(d->find_collisions);
}
return (q == p && r != NULL) ? &d->r[index] : NULL;
}
@@ -949,7 +945,7 @@ init_chunk_info(struct dir_info *d, stru
p->bucket = bucket;
p->total = p->free = MALLOC_PAGESIZE / B2ALLOC(bucket);
- p->offset = bucket == 0 ? 0xdead : howmany(p->total, MALLOC_BITS);
+ p->offset = howmany(p->total, MALLOC_BITS);
p->canary = (u_short)d->canary1;
/* set all valid bits in the bitmap */
@@ -971,8 +967,13 @@ alloc_chunk_info(struct dir_info *d, u_i
count = MALLOC_PAGESIZE / B2ALLOC(bucket);
size = howmany(count, MALLOC_BITS);
- size = sizeof(struct chunk_info) + (size - 1) * sizeof(u_short);
- if (mopts.chunk_canaries)
+ /* see declaration of struct chunk_info */
+ if (size <= CHUNK_INFO_TAIL)
+ size = 0;
+ else
+ size -= CHUNK_INFO_TAIL;
+ size = sizeof(struct chunk_info) + size * sizeof(u_short);
+ if (mopts.chunk_canaries && bucket > 0)
size += count * sizeof(u_short);
size = _ALIGN(size);
count = MALLOC_PAGESIZE / size;
@@ -1129,8 +1130,7 @@ fill_canary(char *ptr, size_t sz, size_t
static void *
malloc_bytes(struct dir_info *d, size_t size)
{
- u_int i, r, bucket, listnum;
- size_t k;
+ u_int i, k, r, bucket, listnum;
u_short *lp;
struct chunk_info *bp;
void *p;
@@ -1170,7 +1170,7 @@ malloc_bytes(struct dir_info *d, size_t
/* no bit halfway, go to next full short */
i /= MALLOC_BITS;
for (;;) {
- if (++i >= howmany(bp->total, MALLOC_BITS))
+ if (++i >= bp->offset)
i = 0;
lp = &bp->bits[i];
if (*lp) {
@@ -1228,7 +1228,7 @@ validate_canary(struct dir_info *d, u_ch
}
}
-static uint32_t
+static inline uint32_t
find_chunknum(struct dir_info *d, struct chunk_info *info, void *ptr, int
check)
{
uint32_t chunknum;
@@ -1532,12 +1532,10 @@ findpool(void *p, struct dir_info *argpo
struct dir_info *pool = argpool;
struct region_info *r = find(pool, p);
- STATS_INC(pool->pool_searches);
if (r == NULL) {
u_int i, nmutexes;
nmutexes = mopts.malloc_pool[1]->malloc_mt ?
mopts.malloc_mutexes : 2;
- STATS_INC(pool->other_pool);
for (i = 1; i < nmutexes; i++) {
u_int j = (argpool->mutex + i) & (nmutexes - 1);
@@ -2581,13 +2579,10 @@ malloc_dump1(int poolno, struct dir_info
d->mmap_flag);
ulog("Region slots free %zu/%zu\n",
d->regions_free, d->regions_total);
- ulog("Finds %zu/%zu\n", d->finds, d->find_collisions);
ulog("Inserts %zu/%zu\n", d->inserts, d->insert_collisions);
ulog("Deletes %zu/%zu\n", d->deletes, d->delete_moves);
ulog("Cheap reallocs %zu/%zu\n",
d->cheap_reallocs, d->cheap_realloc_tries);
- ulog("Other pool searches %zu/%zu\n",
- d->other_pool, d->pool_searches);
ulog("In use %zu\n", d->malloc_used);
ulog("Guarded %zu\n", d->malloc_guarded);
dump_free_chunk_info(d, leaks);