On Mon, Dec 14, 2009 at 01:58:34PM +0100, Otto Moerbeek wrote:
> On Mon, Dec 14, 2009 at 01:54:50PM +0100, Otto Moerbeek wrote:
>
> > On Mon, Dec 14, 2009 at 07:44:31AM -0500, Ted Unangst wrote:
> >
> > > On Mon, Dec 14, 2009 at 3:58 AM, Otto Moerbeek <[email protected]> wrote:
> > > >> apart from the random page addresses obtained form mmap(2) malloc(3)
> > > >> itself also randomizes cache en chunk operations. It uses a nibble of
> > > >> randomness per call, so optimize that to not waste half the random
> > > >> bits.
> > > >>
> > > >> Please test, should be a bit faster.
> > >
> > > I don't really like this. It looks complicated (hard to judge). arc4
> > > is fast enough I don't think we need to worry about conserving bits,
> > > and I'd be surprised if it actually were faster considering how much
> > > work you're doing per nibble compared to the work to just run arc4.
> > > However fast it may be, I feel much safer with the current code.
> >
> > Every call to arc4random_buf() causes a getpid() call, which is a
> > syscall with all the overhead. This saves half of those.
>
> Hit "send" too fast....
>
> Yes, it is a bit more more complicated, but I think not too much.
>
> -Otto
This is an alternative version from guenther@, which is a bit simpler
and generates samller code (as seen on i386). I intend to commit this
soon, the numbers from Joachim Schipper show a small, but significant
speedup.
-Otto
Index: malloc.c
===================================================================
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.122
diff -u -p -r1.122 malloc.c
--- malloc.c 7 Dec 2009 18:47:38 -0000 1.122
+++ malloc.c 15 Dec 2009 19:15:33 -0000
@@ -66,7 +66,7 @@
#define MALLOC_MAXCHUNK (1 << (MALLOC_PAGESHIFT-1))
#define MALLOC_MAXCACHE 256
-#define MALLOC_DELAYED_CHUNKS 16 /* should be power of 2 */
+#define MALLOC_DELAYED_CHUNKS 15 /* max of getrnibble() */
/*
* When the P option is active, we move allocations between half a page
* and a whole page towards the end, subject to alignment constraints.
@@ -112,7 +112,7 @@ struct dir_info {
/* free pages cache */
struct region_info free_regions[MALLOC_MAXCACHE];
/* delayed free chunk slots */
- void *delayed_chunks[MALLOC_DELAYED_CHUNKS];
+ void *delayed_chunks[MALLOC_DELAYED_CHUNKS + 1];
#ifdef MALLOC_STATS
size_t inserts;
size_t insert_collisions;
@@ -185,9 +185,9 @@ static int malloc_active; /* status of
static size_t malloc_guarded; /* bytes used for guards */
static size_t malloc_used; /* bytes allocated */
-static size_t rbytesused; /* random bytes used */
+static size_t rnibblesused; /* random nibbles used */
static u_char rbytes[512]; /* random bytes */
-static u_char getrbyte(void);
+static u_char getrnibble(void);
extern char *__progname;
@@ -380,6 +380,24 @@ wrterror(char *p)
abort();
}
+static void
+rbytes_init(void)
+{
+ arc4random_buf(rbytes, sizeof(rbytes));
+ rnibblesused = 0;
+}
+
+static inline u_char
+getrnibble(void)
+{
+ u_char x;
+
+ if (rnibblesused >= 2 * sizeof(rbytes))
+ rbytes_init();
+ x = rbytes[rnibblesused++ / 2];
+ return (rnibblesused & 1 ? x & 0xf : x >> 4);
+}
+
/*
* Cache maintenance. We keep at most malloc_cache pages cached.
* If the cache is becoming full, unmap pages in the cache for real,
@@ -410,7 +428,7 @@ unmap(struct dir_info *d, void *p, size_
rsz = mopts.malloc_cache - d->free_regions_size;
if (psz > rsz)
tounmap = psz - rsz;
- offset = getrbyte();
+ offset = getrnibble();
for (i = 0; tounmap > 0 && i < mopts.malloc_cache; i++) {
r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
if (r->p != NULL) {
@@ -491,7 +509,7 @@ map(struct dir_info *d, size_t sz, int z
/* zero fill not needed */
return p;
}
- offset = getrbyte();
+ offset = getrnibble();
for (i = 0; i < mopts.malloc_cache; i++) {
r = &d->free_regions[(i + offset) & (mopts.malloc_cache - 1)];
if (r->p != NULL) {
@@ -538,21 +556,6 @@ map(struct dir_info *d, size_t sz, int z
return p;
}
-static void
-rbytes_init(void)
-{
- arc4random_buf(rbytes, sizeof(rbytes));
- rbytesused = 0;
-}
-
-static u_char
-getrbyte(void)
-{
- if (rbytesused >= sizeof(rbytes))
- rbytes_init();
- return rbytes[rbytesused++];
-}
-
/*
* Initialize a dir_info, which should have been cleared by caller
*/
@@ -1012,7 +1015,7 @@ malloc_bytes(struct dir_info *d, size_t
}
/* advance a random # of positions */
- i = (getrbyte() & (MALLOC_DELAYED_CHUNKS - 1)) % bp->free;
+ i = getrnibble() % bp->free;
while (i > 0) {
u += u;
k++;
@@ -1275,7 +1278,7 @@ ofree(void *p)
if (mopts.malloc_junk && sz > 0)
memset(p, SOME_FREEJUNK, sz);
if (!mopts.malloc_freeprot) {
- i = getrbyte() & (MALLOC_DELAYED_CHUNKS - 1);
+ i = getrnibble();
tmp = p;
p = g_pool->delayed_chunks[i];
g_pool->delayed_chunks[i] = tmp;