On Wed, Dec 02, 2009 at 10:38:10AM +0100, Otto Moerbeek wrote:
> Hi,
>
> apart from the random page addresses obtained form mmap(2) malloc(3)
> itself also randomizes cache en chunk operations. It uses a nibble of
> randomness per call, so optimize that to not waste half the random
> bits.
>
> Please test, should be a bit faster.
Anybody?
-Otto
>
> Index: malloc.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
> retrieving revision 1.121
> diff -u -p -r1.121 malloc.c
> --- malloc.c 27 Nov 2009 20:11:01 -0000 1.121
> +++ malloc.c 30 Nov 2009 19:40:47 -0000
> @@ -64,7 +64,7 @@
>
> #define MALLOC_MAXCHUNK (1 << (MALLOC_PAGESHIFT-1))
> #define MALLOC_MAXCACHE 256
> -#define MALLOC_DELAYED_CHUNKS 16 /* should be power of 2 */
> +#define MALLOC_DELAYED_CHUNKS 15 /* max of getrnibble() */
> /*
> * When the P option is active, we move allocations between half a page
> * and a whole page towards the end, subject to alignment constraints.
> @@ -110,7 +110,7 @@ struct dir_info {
> /* free pages cache */
> struct region_info free_regions[MALLOC_MAXCACHE];
> /* delayed free chunk slots */
> - void *delayed_chunks[MALLOC_DELAYED_CHUNKS];
> + void *delayed_chunks[MALLOC_DELAYED_CHUNKS + 1];
> #ifdef MALLOC_STATS
> size_t inserts;
> size_t insert_collisions;
> @@ -183,9 +183,9 @@ static int malloc_active; /* status of
> static size_t malloc_guarded; /* bytes used for guards */
> static size_t malloc_used; /* bytes allocated */
>
> -static size_t rbytesused; /* random bytes used */
> +static size_t rnibblesused; /* random nibbles used */
> static u_char rbytes[512]; /* random bytes */
> -static u_char getrbyte(void);
> +static u_char getrnibble(void);
>
> extern char *__progname;
>
> @@ -378,6 +378,26 @@ wrterror(char *p)
> abort();
> }
>
> +static void
> +rbytes_init(void)
> +{
> + arc4random_buf(rbytes, sizeof(rbytes));
> + rnibblesused = 0;
> +}
> +
> +static inline u_char
> +getrnibble(void)
> +{
> + u_char x;
> +
> + if (rnibblesused >= 2 * sizeof(rbytes))
> + rbytes_init();
> + x = rnibblesused < sizeof(rbytes) ? (rbytes[rnibblesused] & 0xf) :
> + (rbytes[rnibblesused - sizeof(rbytes)] >> 4);
> + rnibblesused++;
> + return x;
> +}
> +
> /*
> * Cache maintenance. We keep at most malloc_cache pages cached.
> * If the cache is becoming full, unmap pages in the cache for real,
> @@ -408,7 +428,7 @@ unmap(struct dir_info *d, void *p, size_
> rsz = mopts.malloc_cache - d->free_regions_size;
> if (psz > rsz)
> tounmap = psz - rsz;
> - offset = getrbyte();
> + offset = getrnibble();
> for (i = 0; tounmap > 0 && i < mopts.malloc_cache; i++) {
> r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
> if (r->p != NULL) {
> @@ -489,7 +509,7 @@ map(struct dir_info *d, size_t sz, int z
> /* zero fill not needed */
> return p;
> }
> - offset = getrbyte();
> + offset = getrnibble();
> for (i = 0; i < mopts.malloc_cache; i++) {
> r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
> if (r->p != NULL) {
> @@ -536,21 +556,6 @@ map(struct dir_info *d, size_t sz, int z
> return p;
> }
>
> -static void
> -rbytes_init(void)
> -{
> - arc4random_buf(rbytes, sizeof(rbytes));
> - rbytesused = 0;
> -}
> -
> -static u_char
> -getrbyte(void)
> -{
> - if (rbytesused >= sizeof(rbytes))
> - rbytes_init();
> - return rbytes[rbytesused++];
> -}
> -
> /*
> * Initialize a dir_info, which should have been cleared by caller
> */
> @@ -1010,7 +1015,7 @@ malloc_bytes(struct dir_info *d, size_t
> }
>
> /* advance a random # of positions */
> - i = (getrbyte() & (MALLOC_DELAYED_CHUNKS - 1)) % bp->free;
> + i = getrnibble() % bp->free;
> while (i > 0) {
> u += u;
> k++;
> @@ -1273,7 +1278,7 @@ ofree(void *p)
> if (mopts.malloc_junk && sz > 0)
> memset(p, SOME_FREEJUNK, sz);
> if (!mopts.malloc_freeprot) {
> - i = getrbyte() & (MALLOC_DELAYED_CHUNKS - 1);
> + i = getrnibble();
> tmp = p;
> p = g_pool->delayed_chunks[i];
> g_pool->delayed_chunks[i] = tmp;