On Tue, Apr 26, 2011 at 09:13:47PM +0200, Otto Moerbeek wrote:
> On Tue, Apr 26, 2011 at 10:09:23AM -0400, Ted Unangst wrote:
>
> > On Tue, Apr 26, 2011 at 9:33 AM, Otto Moerbeek <[email protected]> wrote:
> > > This diff implements a tradeoff to gain speed at the cost of reducing
> > > the randomness of chunk allocation in malloc slightly.
> > >
> > > The idea is only to randomize the first half of chunks in a page. The
> > > second half of chunks will fill in the gaps in-order. The
> > > effectiveness of the current randomization decreases already with the
> > > number of free slots diminishing in a page.
> > >
> > > In one test case, this diff reduced the runtime from 31s to 25s. I'm
> > > not completely sure if the reduced randomness is acceptable. But if
> >
> > Perhaps a quarter? We want to prevent adjacent consecutive
> > allocations, which is still very likely at the half way point, but
> > diminishes after that.
>
> Yes, that might be better, though you some of the performance gain is
> lost because you are scanning a lot of bits: i free bits + all bits in
> between that are not free. If a chunk page is pretty full, that's a
> lot of bits before you find the i'th free chunk.
>
> Originally I though most of the time was lost getting the random bits,
> but now it seems the scanning of the bits is to blame. Unless I'm
> misinterpreting my data....
>
> -Otto
Second version of diff. This is a conservative one, i.e. it does not
change randomization in any way. The diff achieves a speedup by:
- Move from long units to short, making the test for a complete non-free
unit more effective.
- In the randomization loop, also consider whole units instead of always
doing it bit by bit.
-Otto
Index: malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.127
diff -u -p -r1.127 malloc.c
--- malloc.c 16 Dec 2010 18:47:01 -0000 1.127
+++ malloc.c 27 Apr 2011 07:43:43 -0000
@@ -138,7 +138,7 @@ struct dir_info {
*
* How many bits per u_long in the bitmap
*/
-#define MALLOC_BITS (NBBY * sizeof(u_long))
+#define MALLOC_BITS (NBBY * sizeof(u_short))
struct chunk_info {
LIST_ENTRY(chunk_info) entries;
void *page; /* pointer to the page */
@@ -148,7 +148,7 @@ struct chunk_info {
u_short free; /* how many free chunks */
u_short total; /* how many chunk */
/* which chunks are free */
- u_long bits[(MALLOC_PAGESIZE / MALLOC_MINSIZE) / MALLOC_BITS];
+ u_short bits[(MALLOC_PAGESIZE / MALLOC_MINSIZE) / MALLOC_BITS];
};
struct malloc_readonly {
@@ -958,10 +958,10 @@ omalloc_make_chunks(struct dir_info *d,
/* Do a bunch at a time */
for (; (k - i) >= MALLOC_BITS; i += MALLOC_BITS)
- bp->bits[i / MALLOC_BITS] = ~0UL;
+ bp->bits[i / MALLOC_BITS] = (u_short)~0U;
for (; i < k; i++)
- bp->bits[i / MALLOC_BITS] |= 1UL << (i % MALLOC_BITS);
+ bp->bits[i / MALLOC_BITS] |= (u_short)1U << (i % MALLOC_BITS);
LIST_INSERT_HEAD(&d->chunk_dir[bits], bp, entries);
@@ -982,7 +982,7 @@ malloc_bytes(struct dir_info *d, size_t
{
int i, j;
size_t k;
- u_long u, *lp;
+ u_short u, *lp;
struct chunk_info *bp;
if (mopts.malloc_canary != (d->canary1 ^ (u_int32_t)(uintptr_t)d) ||
@@ -1026,22 +1026,26 @@ malloc_bytes(struct dir_info *d, size_t
}
/* advance a random # of positions */
- i = getrnibble() % bp->free;
- while (i > 0) {
- u += u;
- k++;
- if (k >= MALLOC_BITS) {
- lp++;
- u = 1;
- k = 0;
- }
- if (lp - bp->bits > (bp->total - 1) / MALLOC_BITS) {
- wrterror("chunk overflow", NULL);
- errno = EFAULT;
- return (NULL);
+ if (bp->free > 1) {
+ i = getrnibble() % bp->free;
+ while (i > 0) {
+ u += u;
+ k++;
+ if (k >= MALLOC_BITS) {
+ while (!*++lp)
+ /* EMPTY */;
+ u = 1;
+ k = 0;
+ if (lp - bp->bits > (bp->total - 1) /
+ MALLOC_BITS) {
+ wrterror("chunk overflow", NULL);
+ errno = EFAULT;
+ return (NULL);
+ }
+ }
+ if (*lp & u)
+ i--;
}
- if (*lp & u)
- i--;
}
*lp ^= u;