On Tue, Apr 26, 2011 at 09:13:47PM +0200, Otto Moerbeek wrote:

> On Tue, Apr 26, 2011 at 10:09:23AM -0400, Ted Unangst wrote:
> 
> > On Tue, Apr 26, 2011 at 9:33 AM, Otto Moerbeek <[email protected]> wrote:
> > > This diff implements a tradeoff to gain speed at the cost of reducing
> > > the randomness of chunk allocation in malloc slightly.
> > >
> > > The idea is only to randomize the first half of chunks in a page. The
> > > second half of chunks will fill in the gaps in-order. The
> > > effectiveness of the current randomization decreases already with the
> > > number of free slots diminishing in a page.
> > >
> > > In one test case, this diff reduced the runtime from 31s to 25s. I'm
> > > not completely sure if the reduced randomness is acceptable. But if
> > 
> > Perhaps a quarter?  We want to prevent adjacent consecutive
> > allocations, which is still very likely at the half way point, but
> > diminishes after that.
> 
> Yes, that might be better, though you some of the performance gain is
> lost because you are scanning a lot of bits: i free bits + all bits in
> between that are not free. If a chunk page is pretty full, that's a
> lot of bits before you find the i'th free chunk. 
> 
> Originally I though most of the time was lost getting the random bits,
> but now it seems the scanning of the bits is to blame. Unless I'm
> misinterpreting my data....
> 
>       -Otto

Second version of diff. This is a conservative one, i.e. it does not
change randomization in any way. The diff achieves a speedup by:

- Move from long units to short, making the test for a complete non-free
unit more effective.

- In the randomization loop, also consider whole units instead of always
doing it bit by bit. 


        -Otto

Index: malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.127
diff -u -p -r1.127 malloc.c
--- malloc.c    16 Dec 2010 18:47:01 -0000      1.127
+++ malloc.c    27 Apr 2011 07:43:43 -0000
@@ -138,7 +138,7 @@ struct dir_info {
  *
  * How many bits per u_long in the bitmap
  */
-#define MALLOC_BITS            (NBBY * sizeof(u_long))
+#define MALLOC_BITS            (NBBY * sizeof(u_short))
 struct chunk_info {
        LIST_ENTRY(chunk_info) entries;
        void *page;                     /* pointer to the page */
@@ -148,7 +148,7 @@ struct chunk_info {
        u_short free;                   /* how many free chunks */
        u_short total;                  /* how many chunk */
                                        /* which chunks are free */
-       u_long bits[(MALLOC_PAGESIZE / MALLOC_MINSIZE) / MALLOC_BITS];
+       u_short bits[(MALLOC_PAGESIZE / MALLOC_MINSIZE) / MALLOC_BITS];
 };
 
 struct malloc_readonly {
@@ -958,10 +958,10 @@ omalloc_make_chunks(struct dir_info *d, 
 
        /* Do a bunch at a time */
        for (; (k - i) >= MALLOC_BITS; i += MALLOC_BITS)
-               bp->bits[i / MALLOC_BITS] = ~0UL;
+               bp->bits[i / MALLOC_BITS] = (u_short)~0U;
 
        for (; i < k; i++)
-               bp->bits[i / MALLOC_BITS] |= 1UL << (i % MALLOC_BITS);
+               bp->bits[i / MALLOC_BITS] |= (u_short)1U << (i % MALLOC_BITS);
 
        LIST_INSERT_HEAD(&d->chunk_dir[bits], bp, entries);
 
@@ -982,7 +982,7 @@ malloc_bytes(struct dir_info *d, size_t 
 {
        int             i, j;
        size_t          k;
-       u_long          u, *lp;
+       u_short         u, *lp;
        struct chunk_info *bp;
 
        if (mopts.malloc_canary != (d->canary1 ^ (u_int32_t)(uintptr_t)d) ||
@@ -1026,22 +1026,26 @@ malloc_bytes(struct dir_info *d, size_t 
        }
 
        /* advance a random # of positions */
-       i = getrnibble() % bp->free;
-       while (i > 0) {
-               u += u;
-               k++;
-               if (k >= MALLOC_BITS) {
-                       lp++;
-                       u = 1;
-                       k = 0;
-               }
-               if (lp - bp->bits > (bp->total - 1) / MALLOC_BITS) {
-                       wrterror("chunk overflow", NULL);
-                       errno = EFAULT;
-                       return (NULL);
+       if (bp->free > 1) {
+               i = getrnibble() % bp->free;
+               while (i > 0) {
+                       u += u;
+                       k++;
+                       if (k >= MALLOC_BITS) {
+                               while (!*++lp)
+                                       /* EMPTY */;
+                               u = 1;
+                               k = 0;
+                               if (lp - bp->bits > (bp->total - 1) /
+                                   MALLOC_BITS) {
+                                       wrterror("chunk overflow", NULL);
+                                       errno = EFAULT;
+                                       return (NULL);
+                               }
+                       }
+                       if (*lp & u)
+                               i--;
                }
-               if (*lp & u)
-                       i--;
        }
 
        *lp ^= u;

Reply via email to