This uses a hash table to maintain a set of delayed allocations, allowing full and efficient double-free detection. The current code can only catch it when the two pointers being swapped are equal, so double-frees that could be caught are missed. A naive loop over every delayed chunk would work fine with the current 16 slot array, but that would make scaling up the number of delayed allocations much more costly.
I'm using this with 2 other changes making the size configurable and adding a FIFO ring buffer in front of the randomized array of the same size, to get a minimum guaranteed delay more than the current 1 free cycle. The randomization still works fine and it ends up providing a nice balance. It's essentially equivalent to the quarantine feature in Valgrind/ASan, but suitable for production and with weaker use-after-free detection. It mixes well with the earlier change to detect write-after-free via junk validation here. diff --git a/stdlib/malloc.c b/stdlib/malloc.c index bc328d2..9e8bd16 100644 --- a/stdlib/malloc.c +++ b/stdlib/malloc.c @@ -118,6 +118,7 @@ struct dir_info { struct region_info free_regions[MALLOC_MAXCACHE]; /* delayed free chunk slots */ void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1]; + void *delayed_chunks_set[(MALLOC_DELAYED_CHUNK_MASK + 1) * 2]; size_t rbytesused; /* random bytes used */ char *func; /* current function */ u_char rbytes[32]; /* random bytes */ @@ -249,6 +250,22 @@ hash(void *p) return sum; } +static inline size_t +hash_chunk(void *p) +{ + size_t sum; + uintptr_t u; + + u = (uintptr_t)p >> MALLOC_MINSHIFT; + sum = u; + sum = (sum << 7) - sum + (u >> 16); +#ifdef __LP64__ + sum = (sum << 7) - sum + (u >> 32); + sum = (sum << 7) - sum + (u >> 48); +#endif + return sum; +} + static void wrterror(struct dir_info *d, char *msg, void *p) { @@ -864,6 +881,58 @@ delete(struct dir_info *d, struct region_info *ri) } } +void delayed_chunks_insert(struct dir_info *d, void *p) { + size_t index; + size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1; + void *q; + + index = hash_chunk(p) & mask; + q = d->delayed_chunks_set[index]; + while (q != NULL) { + if (p == q) { + wrterror(d, "double free", p); + return; + } + index = (index - 1) & mask; + q = d->delayed_chunks_set[index]; + } + d->delayed_chunks_set[index] = p; +} + +void delayed_chunks_delete(struct dir_info *d, void *p) { + size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1; + size_t i, j, r; + void *q; + + i = hash_chunk(p) & mask; + q = d->delayed_chunks_set[i]; + while (q != p) { + if (q == NULL) { + wrterror(d, "pointer missing from address tracking table", p); + return; + } + i = (i - 1) & mask; + q = d->delayed_chunks_set[i]; + } + + for (;;) { + d->delayed_chunks_set[i] = NULL; + j = i; + for (;;) { + i = (i - 1) & mask; + if (d->delayed_chunks_set[i] == NULL) + return; + r = hash_chunk(d->delayed_chunks_set[i]) & mask; + if ((i <= r && r < j) || (r < j && j < i) || + (j < i && i <= r)) + continue; + d->delayed_chunks_set[j] = d->delayed_chunks_set[i]; + break; + } + } +} + + /* * Allocate a page of chunks */ @@ -1315,13 +1384,21 @@ ofree(struct dir_info *pool, void *p) if (!mopts.malloc_freenow) { if (find_chunknum(pool, r, p) == -1) return; + + if (p == NULL) + return; + + delayed_chunks_insert(pool, p); + i = getrbyte(pool) & MALLOC_DELAYED_CHUNK_MASK; tmp = p; p = pool->delayed_chunks[i]; - if (tmp == p) { - wrterror(pool, "double free", p); + + if (p == NULL) return; - } + + delayed_chunks_delete(pool, p); + if (mopts.malloc_junk) validate_junk(pool, p); pool->delayed_chunks[i] = tmp; @@ -1950,6 +2027,7 @@ malloc_dump(int fd) wrterror(pool, "bogus pointer in malloc_dump", p); continue; } + delayed_chunks_delete(pool, p); free_bytes(pool, r, p); pool->delayed_chunks[i] = NULL; }