On Wed, Mar 23, 2016 at 08:00:19AM +0100, Otto Moerbeek wrote: > Hi, > > first diff that seems to work. Tested on amd64 and compile tested on > sparc64. > > It is alo available at http://www.drijf.net/openbsd/malloc > > Form the README: > > The diff should be applied while in /usr/src/lib, it will patch > both librthreads as as well as libc. > > THIS IS WORK IN PROGRESS. It contains multiple things that should > be improved. To name a few things: > > - Curently fixed at 4 pools with a fixed thread -> pool mapping. > - All pools are always initialized, even for single threaded programs, where > only one pool is used. > - Especially realloc gets quite a bit uglier. > - I'm pondering storing the thread -> pool mapping in the thread > struct instead of computing it each time from the tcb address. > > -Otto >
Second diff. Only one person (Stefan Kempf, thanks!) gave feedback... A race condition was fixed in the init code. But there remain race problems in the init code. I will be working on that the coming time. Please be aware that to make this code ready for commit, I need feedback/tests/reviews. There's no way this code will end up in the tree without those. -Otto Index: libc/include/thread_private.h =================================================================== RCS file: /cvs/src/lib/libc/include/thread_private.h,v retrieving revision 1.26 diff -u -p -r1.26 thread_private.h --- libc/include/thread_private.h 7 Apr 2015 01:27:07 -0000 1.26 +++ libc/include/thread_private.h 28 Mar 2016 08:22:31 -0000 @@ -17,6 +17,8 @@ */ extern int __isthreaded; +#define _MALLOC_MUTEXES 4 + /* * Weak symbols are used in libc so that the thread library can * efficiently wrap libc functions. @@ -136,16 +138,16 @@ extern void *__THREAD_NAME(serv_mutex); /* * malloc lock/unlock prototypes and definitions */ -void _thread_malloc_lock(void); -void _thread_malloc_unlock(void); +void _thread_malloc_lock(int); +void _thread_malloc_unlock(int); -#define _MALLOC_LOCK() do { \ +#define _MALLOC_LOCK(n) do { \ if (__isthreaded) \ - _thread_malloc_lock(); \ + _thread_malloc_lock(n); \ } while (0) -#define _MALLOC_UNLOCK() do { \ +#define _MALLOC_UNLOCK(n) do { \ if (__isthreaded) \ - _thread_malloc_unlock();\ + _thread_malloc_unlock(n);\ } while (0) void _thread_atexit_lock(void); Index: libc/stdlib/malloc.c =================================================================== RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v retrieving revision 1.185 diff -u -p -r1.185 malloc.c --- libc/stdlib/malloc.c 17 Mar 2016 17:55:33 -0000 1.185 +++ libc/stdlib/malloc.c 28 Mar 2016 08:22:31 -0000 @@ -1,6 +1,6 @@ /* $OpenBSD: malloc.c,v 1.185 2016/03/17 17:55:33 mmcc Exp $ */ /* - * Copyright (c) 2008, 2010, 2011 Otto Moerbeek <o...@drijf.net> + * Copyright (c) 2008, 2010, 2011, 2016 Otto Moerbeek <o...@drijf.net> * Copyright (c) 2012 Matthew Dempsky <matt...@openbsd.org> * Copyright (c) 2008 Damien Miller <d...@openbsd.org> * Copyright (c) 2000 Poul-Henning Kamp <p...@freebsd.org> @@ -43,6 +43,7 @@ #endif #include "thread_private.h" +#include <machine/tcb.h> #if defined(__sparc__) && !defined(__sparcv9__) #define MALLOC_PAGESHIFT (13U) @@ -95,10 +96,10 @@ #define _MALLOC_LEAVE(d) do { if (__isthreaded) { \ (d)->active--; \ - _MALLOC_UNLOCK(); } \ + _MALLOC_UNLOCK(d->mutex); } \ } while (0) #define _MALLOC_ENTER(d) do { if (__isthreaded) { \ - _MALLOC_LOCK(); \ + _MALLOC_LOCK(d->mutex); \ (d)->active++; } \ } while (0) @@ -129,6 +130,7 @@ struct dir_info { void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1]; size_t rbytesused; /* random bytes used */ char *func; /* current function */ + int mutex; u_char rbytes[32]; /* random bytes */ u_short chunk_start; #ifdef MALLOC_STATS @@ -178,7 +180,7 @@ struct chunk_info { }; struct malloc_readonly { - struct dir_info *malloc_pool; /* Main bookkeeping information */ + struct dir_info *malloc_pool[_MALLOC_MUTEXES]; /* Main bookkeeping information */ int malloc_freenow; /* Free quickly - disable chunk rnd */ int malloc_freeunmap; /* mprotect free pages PROT_NONE? */ int malloc_hint; /* call madvice on free pages? */ @@ -202,14 +204,13 @@ static union { u_char _pad[MALLOC_PAGESIZE]; } malloc_readonly __attribute__((aligned(MALLOC_PAGESIZE))); #define mopts malloc_readonly.mopts -#define getpool() mopts.malloc_pool char *malloc_options; /* compile-time options */ static u_char getrbyte(struct dir_info *d); #ifdef MALLOC_STATS -void malloc_dump(int); +void malloc_dump(int, struct dir_info *); PROTO_NORMAL(malloc_dump); static void malloc_exit(void); #define CALLER __builtin_return_address(0) @@ -240,6 +241,18 @@ hash(void *p) return sum; } +static inline +struct dir_info *getpool(void) +{ + //return mopts.malloc_pool[0]; + if (!__isthreaded) + return mopts.malloc_pool[0]; + else + return mopts.malloc_pool[hash(TCB_GET()) & + (_MALLOC_MUTEXES - 1)]; +} + + static void wrterror(struct dir_info *d, char *msg, void *p) { @@ -247,7 +260,7 @@ wrterror(struct dir_info *d, char *msg, struct iovec iov[7]; char pidbuf[20]; char buf[20]; - int saved_errno = errno; + int saved_errno = errno, i; iov[0].iov_base = __progname; iov[0].iov_len = strlen(__progname); @@ -278,7 +291,8 @@ wrterror(struct dir_info *d, char *msg, #ifdef MALLOC_STATS if (mopts.malloc_stats) - malloc_dump(STDERR_FILENO); + for (i = 0; i < _MALLOC_MUTEXES; i++) + malloc_dump(STDERR_FILENO, mopts.malloc_pool[i]); #endif /* MALLOC_STATS */ errno = saved_errno; @@ -565,16 +579,11 @@ omalloc_parseopt(char opt) } } -/* - * Initialize a dir_info, which should have been cleared by caller - */ -static int -omalloc_init(struct dir_info **dp) +static void +omalloc_init(void) { char *p, *q, b[64]; int i, j; - size_t d_avail, regioninfo_size; - struct dir_info *d; /* * Default options @@ -637,6 +646,18 @@ omalloc_init(struct dir_info **dp) arc4random_buf(&mopts.malloc_chunk_canary, sizeof(mopts.malloc_chunk_canary)); +} + +/* + * Initialize a dir_info, which should have been cleared by caller + */ +static int +omalloc_poolinit(struct dir_info **dp) +{ + void *p; + size_t d_avail, regioninfo_size; + struct dir_info *d; + int i, j; /* * Allocate dir_info with a guard page on either side. Also @@ -644,7 +665,7 @@ omalloc_init(struct dir_info **dp) * lies (subject to alignment by 1 << MALLOC_MINSHIFT) */ if ((p = MMAP(DIR_INFO_RSZ + (MALLOC_PAGESIZE * 2))) == MAP_FAILED) - return -1; + return 1; mprotect(p, MALLOC_PAGESIZE, PROT_NONE); mprotect(p + MALLOC_PAGESIZE + DIR_INFO_RSZ, MALLOC_PAGESIZE, PROT_NONE); @@ -672,13 +693,6 @@ omalloc_init(struct dir_info **dp) *dp = d; - /* - * Options have been set and will never be reset. - * Prevent further tampering with them. - */ - if (((uintptr_t)&malloc_readonly & MALLOC_PAGEMASK) == 0) - mprotect(&malloc_readonly, sizeof(malloc_readonly), PROT_READ); - return 0; } @@ -1174,20 +1188,41 @@ malloc_recurse(struct dir_info *d) wrterror(d, "recursive call", NULL); } d->active--; - _MALLOC_UNLOCK(); + _MALLOC_UNLOCK(d->mutex); errno = EDEADLK; } static int malloc_init(void) { - if (omalloc_init(&mopts.malloc_pool)) { - _MALLOC_UNLOCK(); - if (mopts.malloc_xmalloc) - wrterror(NULL, "out of memory", NULL); - errno = ENOMEM; - return -1; + int i; + struct dir_info *d; + + _MALLOC_LOCK(0); + if (mopts.malloc_pool[0]) { + _MALLOC_UNLOCK(0); + return 0; + } + omalloc_init(); + for (i = 0; i < _MALLOC_MUTEXES; i++) { + if (omalloc_poolinit(&d)) { + _MALLOC_UNLOCK(0); + if (mopts.malloc_xmalloc) + wrterror(NULL, "out of memory", NULL); + errno = ENOMEM; + return -1; + } + d->mutex = i; + mopts.malloc_pool[i] = d; } + + /* + * Options have been set and will never be reset. + * Prevent further tampering with them. + */ + if (((uintptr_t)&malloc_readonly & MALLOC_PAGEMASK) == 0) + mprotect(&malloc_readonly, sizeof(malloc_readonly), PROT_READ); + _MALLOC_UNLOCK(0); return 0; } @@ -1198,13 +1233,13 @@ malloc(size_t size) struct dir_info *d; int saved_errno = errno; - _MALLOC_LOCK(); d = getpool(); if (d == NULL) { if (malloc_init() != 0) return NULL; d = getpool(); } + _MALLOC_LOCK(d->mutex); d->func = "malloc():"; if (d->active++) { @@ -1215,7 +1250,7 @@ malloc(size_t size) size += mopts.malloc_canaries; r = omalloc(d, size, 0, CALLER); d->active--; - _MALLOC_UNLOCK(); + _MALLOC_UNLOCK(d->mutex); if (r == NULL && mopts.malloc_xmalloc) { wrterror(d, "out of memory", NULL); errno = ENOMEM; @@ -1252,23 +1287,41 @@ validate_junk(struct dir_info *pool, voi } static void -ofree(struct dir_info *pool, void *p) +ofree(struct dir_info *argpool, void *p) { + struct dir_info *pool; struct region_info *r; size_t sz; + int i; + pool = argpool; r = find(pool, p); if (r == NULL) { - wrterror(pool, "bogus pointer (double free?)", p); - return; + for (i = 0; i < _MALLOC_MUTEXES; i++) { + if (i == pool->mutex) + continue; + pool->active--; + _MALLOC_UNLOCK(pool->mutex); + pool = mopts.malloc_pool[i]; + _MALLOC_LOCK(pool->mutex); + pool->active++; + r = find(pool, p); + if (r != NULL) + break; + } + if (r == NULL) { + wrterror(pool, "bogus pointer (double free?)", p); + goto done; + } } + REALSIZE(sz, r); if (sz > MALLOC_MAXCHUNK) { if (sz - mopts.malloc_guard >= MALLOC_PAGESIZE - MALLOC_LEEWAY) { if (r->p != p) { wrterror(pool, "bogus pointer", p); - return; + goto done; } } else { #if notyetbecause_of_realloc @@ -1306,13 +1359,13 @@ ofree(struct dir_info *pool, void *p) memset(p, SOME_FREEJUNK, sz - mopts.malloc_canaries); if (!mopts.malloc_freenow) { if (find_chunknum(pool, r, p) == -1) - return; + goto done; i = getrbyte(pool) & MALLOC_DELAYED_CHUNK_MASK; tmp = p; p = pool->delayed_chunks[i]; if (tmp == p) { wrterror(pool, "double free", p); - return; + goto done; } if (mopts.malloc_junk) validate_junk(pool, p); @@ -1322,11 +1375,18 @@ ofree(struct dir_info *pool, void *p) r = find(pool, p); if (r == NULL) { wrterror(pool, "bogus pointer (double free?)", p); - return; + goto done; } free_bytes(pool, r, p); } } +done: + if (argpool != pool) { + pool->active--; + _MALLOC_UNLOCK(pool->mutex); + _MALLOC_LOCK(argpool->mutex); + argpool->active++; + } } void @@ -1339,13 +1399,12 @@ free(void *ptr) if (ptr == NULL) return; - _MALLOC_LOCK(); d = getpool(); if (d == NULL) { - _MALLOC_UNLOCK(); wrterror(d, "free() called before allocation", NULL); return; } + _MALLOC_LOCK(d->mutex); d->func = "free():"; if (d->active++) { malloc_recurse(d); @@ -1353,30 +1412,50 @@ free(void *ptr) } ofree(d, ptr); d->active--; - _MALLOC_UNLOCK(); + _MALLOC_UNLOCK(d->mutex); errno = saved_errno; } /*DEF_STRONG(free);*/ static void * -orealloc(struct dir_info *pool, void *p, size_t newsz, void *f) +orealloc(struct dir_info *argpool, void *p, size_t newsz, void *f) { + struct dir_info *pool; struct region_info *r; size_t oldsz, goldsz, gnewsz; - void *q; + void *q, *ret; + int i; + + pool = argpool; if (p == NULL) return omalloc(pool, newsz, 0, f); r = find(pool, p); if (r == NULL) { - wrterror(pool, "bogus pointer (double free?)", p); - return NULL; + for (i = 0; i < _MALLOC_MUTEXES; i++) { + if (i == pool->mutex) + continue; + pool->active--; + _MALLOC_UNLOCK(pool->mutex); + pool = mopts.malloc_pool[i]; + _MALLOC_LOCK(pool->mutex); + pool->active++; + r = find(pool, p); + if (r != NULL) + break; + } + if (r == NULL) { + wrterror(pool, "bogus pointer (double free?)", p); + ret = NULL; + goto done; + } } if (newsz >= SIZE_MAX - mopts.malloc_guard - MALLOC_PAGESIZE) { errno = ENOMEM; - return NULL; + ret = NULL; + goto done; } REALSIZE(oldsz, r); @@ -1419,7 +1498,8 @@ gotit: r->size = newsz; STATS_SETF(r, f); STATS_INC(pool->cheap_reallocs); - return p; + ret = p; + goto done; } else if (q != MAP_FAILED) { if (munmap(q, needed)) wrterror(pool, "munmap", q); @@ -1439,14 +1519,16 @@ gotit: unmap(pool, (char *)p + rnewsz, roldsz - rnewsz); r->size = gnewsz; STATS_SETF(r, f); - return p; + ret = p; + goto done; } else { if (newsz > oldsz && mopts.malloc_junk == 2) memset((char *)p + newsz, SOME_JUNK, rnewsz - mopts.malloc_guard - newsz); r->size = gnewsz; STATS_SETF(r, f); - return p; + ret = p; + goto done; } } if (newsz <= oldsz && newsz > oldsz / 2 && !mopts.malloc_realloc) { @@ -1458,11 +1540,13 @@ gotit: memset((char *)p + newsz, SOME_JUNK, usable_oldsz - newsz); } STATS_SETF(r, f); - return p; + ret = p; } else if (newsz != oldsz || mopts.malloc_realloc) { q = omalloc(pool, newsz, 0, f); - if (q == NULL) - return NULL; + if (q == NULL) { + ret = NULL; + goto done; + } if (newsz != 0 && oldsz != 0) { size_t copysz = oldsz < newsz ? oldsz : newsz; if (copysz <= MALLOC_MAXCHUNK) @@ -1470,11 +1554,19 @@ gotit: memcpy(q, p, copysz); } ofree(pool, p); - return q; + ret = q; } else { STATS_SETF(r, f); - return p; + ret = p; + } +done: + if (argpool != pool) { + pool->active--; + _MALLOC_UNLOCK(pool->mutex); + _MALLOC_LOCK(argpool->mutex); + argpool->active++; } + return ret; } void * @@ -1484,13 +1576,13 @@ realloc(void *ptr, size_t size) void *r; int saved_errno = errno; - _MALLOC_LOCK(); d = getpool(); if (d == NULL) { if (malloc_init() != 0) return NULL; d = getpool(); } + _MALLOC_LOCK(d->mutex); d->func = "realloc():"; if (d->active++) { malloc_recurse(d); @@ -1501,7 +1593,7 @@ realloc(void *ptr, size_t size) r = orealloc(d, ptr, size, CALLER); d->active--; - _MALLOC_UNLOCK(); + _MALLOC_UNLOCK(d->mutex); if (r == NULL && mopts.malloc_xmalloc) { wrterror(d, "out of memory", NULL); errno = ENOMEM; @@ -1526,17 +1618,17 @@ calloc(size_t nmemb, size_t size) void *r; int saved_errno = errno; - _MALLOC_LOCK(); d = getpool(); if (d == NULL) { if (malloc_init() != 0) return NULL; d = getpool(); } + _MALLOC_LOCK(d->mutex); d->func = "calloc():"; if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && nmemb > 0 && SIZE_MAX / nmemb < size) { - _MALLOC_UNLOCK(); + _MALLOC_UNLOCK(d->mutex); if (mopts.malloc_xmalloc) wrterror(d, "out of memory", NULL); errno = ENOMEM; @@ -1554,7 +1646,7 @@ calloc(size_t nmemb, size_t size) r = omalloc(d, size, 1, CALLER); d->active--; - _MALLOC_UNLOCK(); + _MALLOC_UNLOCK(d->mutex); if (r == NULL && mopts.malloc_xmalloc) { wrterror(d, "out of memory", NULL); errno = ENOMEM; @@ -1668,13 +1760,13 @@ posix_memalign(void **memptr, size_t ali if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) return EINVAL; - _MALLOC_LOCK(); d = getpool(); if (d == NULL) { if (malloc_init() != 0) goto err; d = getpool(); } + _MALLOC_LOCK(d->mutex); d->func = "posix_memalign():"; if (d->active++) { malloc_recurse(d); @@ -1684,7 +1776,7 @@ posix_memalign(void **memptr, size_t ali size += mopts.malloc_canaries; r = omemalign(d, alignment, size, 0, CALLER); d->active--; - _MALLOC_UNLOCK(); + _MALLOC_UNLOCK(d->mutex); if (r == NULL) { if (mopts.malloc_xmalloc) { wrterror(d, "out of memory", NULL); @@ -1923,9 +2015,8 @@ malloc_dump1(int fd, struct dir_info *d) } void -malloc_dump(int fd) +malloc_dump(int fd, struct dir_info *pool) { - struct dir_info *pool = getpool(); int i; void *p; struct region_info *r; @@ -1956,11 +2047,12 @@ static void malloc_exit(void) { static const char q[] = "malloc() warning: Couldn't dump stats\n"; - int save_errno = errno, fd; + int save_errno = errno, fd, i; fd = open("malloc.out", O_RDWR|O_APPEND); if (fd != -1) { - malloc_dump(fd); + for (i = 0; i < _MALLOC_MUTEXES; i++) + malloc_dump(fd, mopts.malloc_pool[i]); close(fd); } else write(STDERR_FILENO, q, sizeof(q) - 1); Index: libc/thread/unithread_malloc_lock.c =================================================================== RCS file: /cvs/src/lib/libc/thread/unithread_malloc_lock.c,v retrieving revision 1.9 diff -u -p -r1.9 unithread_malloc_lock.c --- libc/thread/unithread_malloc_lock.c 7 Apr 2015 01:27:07 -0000 1.9 +++ libc/thread/unithread_malloc_lock.c 28 Mar 2016 08:22:31 -0000 @@ -28,13 +28,13 @@ WEAK_ALIAS(_thread_arc4_lock); WEAK_ALIAS(_thread_arc4_unlock); void -WEAK_NAME(_thread_malloc_lock)(void) +WEAK_NAME(_thread_malloc_lock)(int n) { return; } void -WEAK_NAME(_thread_malloc_unlock)(void) +WEAK_NAME(_thread_malloc_unlock)(int n) { return; } Index: librthread/rthread_fork.c =================================================================== RCS file: /cvs/src/lib/librthread/rthread_fork.c,v retrieving revision 1.15 diff -u -p -r1.15 rthread_fork.c --- librthread/rthread_fork.c 27 Jan 2016 08:40:05 -0000 1.15 +++ librthread/rthread_fork.c 28 Mar 2016 08:22:31 -0000 @@ -55,6 +55,7 @@ _dofork(int is_vfork) pthread_t me; pid_t (*sys_fork)(void); pid_t newid; + int i; sys_fork = is_vfork ? &_thread_sys_vfork : &_thread_sys_fork; @@ -76,7 +77,8 @@ _dofork(int is_vfork) #endif _thread_atexit_lock(); - _thread_malloc_lock(); + for (i = 0; i < _MALLOC_MUTEXES; i++) + _thread_malloc_lock(i); _thread_arc4_lock(); newid = sys_fork(); @@ -85,7 +87,8 @@ _dofork(int is_vfork) if (newid == 0) _thread_malloc_reinit(); else - _thread_malloc_unlock(); + for (i = 0; i < _MALLOC_MUTEXES; i++) + _thread_malloc_unlock(i); _thread_atexit_unlock(); if (newid == 0) { Index: librthread/rthread_libc.c =================================================================== RCS file: /cvs/src/lib/librthread/rthread_libc.c,v retrieving revision 1.13 diff -u -p -r1.13 rthread_libc.c --- librthread/rthread_libc.c 27 Jan 2016 08:40:05 -0000 1.13 +++ librthread/rthread_libc.c 28 Mar 2016 08:22:31 -0000 @@ -152,35 +152,50 @@ _thread_mutex_destroy(void **mutex) /* * the malloc lock */ -static struct pthread_mutex malloc_lock = { - _SPINLOCK_UNLOCKED, - TAILQ_HEAD_INITIALIZER(malloc_lock.lockers), - PTHREAD_MUTEX_DEFAULT, - NULL, - 0, - -1 +#define MALLOC_LOCK_INITIALZER(n) { \ + _SPINLOCK_UNLOCKED, \ + TAILQ_HEAD_INITIALIZER(malloc_lock[n].lockers), \ + PTHREAD_MUTEX_DEFAULT, \ + NULL, \ + 0, \ + -1 } \ + +static struct pthread_mutex malloc_lock[_MALLOC_MUTEXES] = { + MALLOC_LOCK_INITIALZER(0), + MALLOC_LOCK_INITIALZER(1), + MALLOC_LOCK_INITIALZER(2), + MALLOC_LOCK_INITIALZER(3) +}; +static pthread_mutex_t malloc_mutex[_MALLOC_MUTEXES] = { + &malloc_lock[0], + &malloc_lock[1], + &malloc_lock[2], + &malloc_lock[3] }; -static pthread_mutex_t malloc_mutex = &malloc_lock; void -_thread_malloc_lock(void) +_thread_malloc_lock(int i) { - pthread_mutex_lock(&malloc_mutex); + pthread_mutex_lock(&malloc_mutex[i]); } void -_thread_malloc_unlock(void) +_thread_malloc_unlock(int i) { - pthread_mutex_unlock(&malloc_mutex); + pthread_mutex_unlock(&malloc_mutex[i]); } void _thread_malloc_reinit(void) { - malloc_lock.lock = _SPINLOCK_UNLOCKED_ASSIGN; - TAILQ_INIT(&malloc_lock.lockers); - malloc_lock.owner = NULL; - malloc_lock.count = 0; + int i; + + for (i = 0; i < _MALLOC_MUTEXES; i++) { + malloc_lock[i].lock = _SPINLOCK_UNLOCKED_ASSIGN; + TAILQ_INIT(&malloc_lock[i].lockers); + malloc_lock[i].owner = NULL; + malloc_lock[i].count = 0; + } } /*