Hi,

a few micro-optimization, including getting rid of some statistics
that are not actualy very interesting.

Speedup amounts to a few tenths of percents to a few percents,
depending on how biased the benchmark is.

        -Otto

Index: stdlib/malloc.c
===================================================================
RCS file: /home/cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.291
diff -u -p -r1.291 malloc.c
--- stdlib/malloc.c     22 Oct 2023 12:19:26 -0000      1.291
+++ stdlib/malloc.c     24 Oct 2023 14:05:37 -0000
@@ -169,16 +169,12 @@ struct dir_info {
        void *caller;
        size_t inserts;
        size_t insert_collisions;
-       size_t finds;
-       size_t find_collisions;
        size_t deletes;
        size_t delete_moves;
        size_t cheap_realloc_tries;
        size_t cheap_reallocs;
        size_t malloc_used;             /* bytes allocated */
        size_t malloc_guarded;          /* bytes used for guards */
-       size_t pool_searches;           /* searches for pool */
-       size_t other_pool;              /* searches in other pool */
 #define STATS_ADD(x,y) ((x) += (y))
 #define STATS_SUB(x,y) ((x) -= (y))
 #define STATS_INC(x)   ((x)++)
@@ -209,12 +205,14 @@ static void unmap(struct dir_info *d, vo
 struct chunk_info {
        LIST_ENTRY(chunk_info) entries;
        void *page;                     /* pointer to the page */
+       /* number of shorts should add up to 8, check alloc_chunk_info() */
        u_short canary;
        u_short bucket;
        u_short free;                   /* how many free chunks */
        u_short total;                  /* how many chunks */
        u_short offset;                 /* requested size table offset */
-       u_short bits[1];                /* which chunks are free */
+#define CHUNK_INFO_TAIL                        3
+       u_short bits[CHUNK_INFO_TAIL];  /* which chunks are free */
 };
 
 #define CHUNK_FREE(i, n)       ((i)->bits[(n) / MALLOC_BITS] & (1U << ((n) % 
MALLOC_BITS)))
@@ -656,12 +654,10 @@ find(struct dir_info *d, void *p)
        index = hash(p) & mask;
        r = d->r[index].p;
        q = MASK_POINTER(r);
-       STATS_INC(d->finds);
        while (q != p && r != NULL) {
                index = (index - 1) & mask;
                r = d->r[index].p;
                q = MASK_POINTER(r);
-               STATS_INC(d->find_collisions);
        }
        return (q == p && r != NULL) ? &d->r[index] : NULL;
 }
@@ -949,7 +945,7 @@ init_chunk_info(struct dir_info *d, stru
 
        p->bucket = bucket;
        p->total = p->free = MALLOC_PAGESIZE / B2ALLOC(bucket);
-       p->offset = bucket == 0 ? 0xdead : howmany(p->total, MALLOC_BITS);
+       p->offset = howmany(p->total, MALLOC_BITS);
        p->canary = (u_short)d->canary1;
 
        /* set all valid bits in the bitmap */
@@ -971,8 +967,13 @@ alloc_chunk_info(struct dir_info *d, u_i
                count = MALLOC_PAGESIZE / B2ALLOC(bucket);
 
                size = howmany(count, MALLOC_BITS);
-               size = sizeof(struct chunk_info) + (size - 1) * sizeof(u_short);
-               if (mopts.chunk_canaries)
+               /* see declaration of struct chunk_info */
+               if (size <= CHUNK_INFO_TAIL)
+                       size = 0;
+               else
+                       size -= CHUNK_INFO_TAIL;
+               size = sizeof(struct chunk_info) + size * sizeof(u_short);
+               if (mopts.chunk_canaries && bucket > 0)
                        size += count * sizeof(u_short);
                size = _ALIGN(size);
                count = MALLOC_PAGESIZE / size;
@@ -1129,8 +1130,7 @@ fill_canary(char *ptr, size_t sz, size_t
 static void *
 malloc_bytes(struct dir_info *d, size_t size)
 {
-       u_int i, r, bucket, listnum;
-       size_t k;
+       u_int i, k, r, bucket, listnum;
        u_short *lp;
        struct chunk_info *bp;
        void *p;
@@ -1170,7 +1170,7 @@ malloc_bytes(struct dir_info *d, size_t 
        /* no bit halfway, go to next full short */
        i /= MALLOC_BITS;
        for (;;) {
-               if (++i >= howmany(bp->total, MALLOC_BITS))
+               if (++i >= bp->offset)
                        i = 0;
                lp = &bp->bits[i];
                if (*lp) {
@@ -1228,7 +1228,7 @@ validate_canary(struct dir_info *d, u_ch
        }
 }
 
-static uint32_t
+static inline uint32_t
 find_chunknum(struct dir_info *d, struct chunk_info *info, void *ptr, int 
check)
 {
        uint32_t chunknum;
@@ -1532,12 +1532,10 @@ findpool(void *p, struct dir_info *argpo
        struct dir_info *pool = argpool;
        struct region_info *r = find(pool, p);
 
-       STATS_INC(pool->pool_searches);
        if (r == NULL) {
                u_int i, nmutexes;
 
                nmutexes = mopts.malloc_pool[1]->malloc_mt ? 
mopts.malloc_mutexes : 2;
-               STATS_INC(pool->other_pool);
                for (i = 1; i < nmutexes; i++) {
                        u_int j = (argpool->mutex + i) & (nmutexes - 1);
 
@@ -2581,13 +2579,10 @@ malloc_dump1(int poolno, struct dir_info
                    d->mmap_flag);
                ulog("Region slots free %zu/%zu\n",
                        d->regions_free, d->regions_total);
-               ulog("Finds %zu/%zu\n", d->finds, d->find_collisions);
                ulog("Inserts %zu/%zu\n", d->inserts, d->insert_collisions);
                ulog("Deletes %zu/%zu\n", d->deletes, d->delete_moves);
                ulog("Cheap reallocs %zu/%zu\n",
                    d->cheap_reallocs, d->cheap_realloc_tries);
-               ulog("Other pool searches %zu/%zu\n",
-                   d->other_pool, d->pool_searches);
                ulog("In use %zu\n", d->malloc_used);
                ulog("Guarded %zu\n", d->malloc_guarded);
                dump_free_chunk_info(d, leaks);

Reply via email to