wingo pushed a commit to branch wip-whippet in repository guile. commit 0939aff5f69b668bb44e8b987b812c6ab27061c4 Author: Andy Wingo <wi...@pobox.com> AuthorDate: Wed Jul 16 09:09:07 2025 +0200
Rework mutex/cond waiting to not allocate Fixes some deadlocks when trying to come to a safepoint. * libguile/threads-internal.h (struct scm_wait_queue): New type. Embed it in threads, mutexes, and conds. * libguile/threads.c (enqueue, remqueue_inner, remqueue, dequeue): Rework to use embedded wait queue links instead of allocating. (block_self, unblock_from_queue, guilify_self_1) (scm_make_mutex_with_kind, lock_mutex, unlock_mutex, scm_mutex_owner) (scm_mutex_locked_p, scm_make_condition_variable, timed_wait) (scm_signal_condition_variable, scm_broadcast_condition_variable): Adapt to wait queue change. Also, pin mutexes and conds. --- libguile/threads-internal.h | 37 +++++++- libguile/threads.c | 209 +++++++++++++++++++------------------------- 2 files changed, 122 insertions(+), 124 deletions(-) diff --git a/libguile/threads-internal.h b/libguile/threads-internal.h index 344e22f7b..84af674a9 100644 --- a/libguile/threads-internal.h +++ b/libguile/threads-internal.h @@ -43,9 +43,35 @@ struct scm_thread_wake_data scm_i_pthread_cond_t *cond; }; +/* A scm_wait_queue is a link in a circular linked-list. A + scm_wait_queue can either be "attached", in which case its next and + prev links are non-null, or "detached", in which case they are both + null. Condition variables and mutexes embed a wait queue as a list + head, and their queues are always attached. An empty queue is one + whose next and prev links point to itself. Threads embed a wait + queue as a link, which may be detached if the thread is not waiting + on anything, or attached to the wait queue of the cond or mutex that + it is waiting on. + + GC-wise, if a thread is waiting on a cond or mutex, the thread will + be reachable because it is on the global thread list. If a thread + owns a reachable mutex, it is reachable because of the mutex's owner + field. Any thread waiting for a mutex/cond has an active stack and + that mutex/cond is on the stack, so the mutex/cond will be reachable. + + Because of the interior pointers in wait queues, we pin conds and + mutexes when they are allocated, even if there is no one waiting for + them. (scm_thread is always pinned as well, for other reasons.) */ +struct scm_wait_queue { + scm_thread *thread; + struct scm_wait_queue *next; + struct scm_wait_queue *prev; +}; + struct scm_thread { scm_t_bits tag; + /* Link for global set of threads. */ struct scm_thread *next_thread; /* VM state for this thread. */ @@ -71,6 +97,9 @@ struct scm_thread { this thread exits. */ int needs_unregister; + /* Queue links for when a thread is waiting on a mutex or a cond. */ + struct scm_wait_queue queue; + struct scm_thread_wake_data wake_data; scm_i_pthread_cond_t sleep_cond; int sleep_pipe[2]; @@ -161,10 +190,10 @@ enum scm_mutex_kind { struct scm_mutex { scm_t_bits tag_and_flags; - /* The thread that owns this mutex, or #f if the mutex is unlocked. */ - SCM owner; + /* The thread that owns this mutex, or NULL if the mutex is unlocked. */ + scm_thread *owner; /* Queue of threads waiting for this mutex. */ - SCM waiting; + struct scm_wait_queue waiting; /* For SCM_MUTEX_RECURSIVE (and only SCM_MUTEX_RECURSIVE), the recursive lock count. The first lock does not count. */ int level; @@ -194,7 +223,7 @@ scm_mutex_kind (struct scm_mutex *m) struct scm_cond { scm_t_bits tag; - SCM waiting; /* the threads waiting for this condition. */ + struct scm_wait_queue waiting; /* the threads waiting for this condition. */ /* FIXME: Using one cond with multiple mutexes may race on the waiting list. */ }; diff --git a/libguile/threads.c b/libguile/threads.c index 1bb6e4559..7aefa09bc 100644 --- a/libguile/threads.c +++ b/libguile/threads.c @@ -136,94 +136,67 @@ to_timespec (SCM t, scm_t_timespec *waittime) /*** Queues */ -/* Note: We annotate with "GC-robust" assignments whose purpose is to avoid - the risk of false references leading to unbounded retained space as - described in "Bounding Space Usage of Conservative Garbage Collectors", - H.J. Boehm, 2001. */ - -/* Make an empty queue data structure. - */ -static SCM -make_queue () -{ - return scm_cons (SCM_EOL, SCM_EOL); -} - static scm_i_pthread_mutex_t queue_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER; -/* Put T at the back of Q and return a handle that can be used with - remqueue to remove T from Q again. - */ -static SCM -enqueue (SCM q, SCM t) +static void +enqueue (struct scm_wait_queue *q, scm_thread *t) { - SCM c = scm_cons (t, SCM_EOL); + struct scm_wait_queue *p = &t->queue; + if (p->next) + abort (); + scm_i_pthread_mutex_lock (&queue_lock); - if (scm_is_null (SCM_CDR (q))) - SCM_SETCDR (q, c); - else - SCM_SETCDR (SCM_CAR (q), c); - SCM_SETCAR (q, c); + p->next = q->next; + p->prev = q; + q->next->prev = p; + q->next = p; scm_i_pthread_mutex_unlock (&queue_lock); - return c; } -/* Remove the element that the handle C refers to from the queue Q. C - must have been returned from a call to enqueue. The return value - is zero when the element referred to by C has already been removed. - Otherwise, 1 is returned. -*/ static int -remqueue (SCM q, SCM c) +remqueue_inner (struct scm_wait_queue *p) { - SCM p, prev = q; - scm_i_pthread_mutex_lock (&queue_lock); - for (p = SCM_CDR (q); !scm_is_null (p); p = SCM_CDR (p)) - { - if (scm_is_eq (p, c)) - { - if (scm_is_eq (c, SCM_CAR (q))) - SCM_SETCAR (q, scm_is_eq (prev, q) ? SCM_EOL : prev); - SCM_SETCDR (prev, SCM_CDR (c)); + if (!p->next) + return 0; - /* GC-robust */ - SCM_SETCDR (c, SCM_EOL); + if (p->next == p) + abort (); - scm_i_pthread_mutex_unlock (&queue_lock); - return 1; - } - prev = p; - } - scm_i_pthread_mutex_unlock (&queue_lock); - return 0; + p->next->prev = p->prev; + p->prev->next = p->next; + + p->next = p->prev = NULL; + + return 1; } -/* Remove the front-most element from the queue Q and return it. - Return SCM_BOOL_F when Q is empty. -*/ -static SCM -dequeue (SCM q) +static int +remqueue (scm_thread *t) { - SCM c; scm_i_pthread_mutex_lock (&queue_lock); - c = SCM_CDR (q); - if (scm_is_null (c)) + int ret = remqueue_inner (&t->queue); + scm_i_pthread_mutex_unlock (&queue_lock); + + return ret; +} + +static scm_thread * +dequeue (struct scm_wait_queue *q) +{ + scm_i_pthread_mutex_unlock (&queue_lock); + struct scm_wait_queue *last = q->prev; + + if (last == q) { scm_i_pthread_mutex_unlock (&queue_lock); - return SCM_BOOL_F; + return NULL; } - else - { - SCM_SETCDR (q, SCM_CDR (c)); - if (scm_is_null (SCM_CDR (q))) - SCM_SETCAR (q, SCM_EOL); - scm_i_pthread_mutex_unlock (&queue_lock); - /* GC-robust */ - SCM_SETCDR (c, SCM_EOL); + if (last->thread == NULL || !remqueue_inner (last)) + abort (); + scm_i_pthread_mutex_unlock (&queue_lock); - return SCM_CAR (c); - } + return last->thread; } int @@ -287,18 +260,17 @@ scm_i_print_thread (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED) The system asyncs themselves are not executed by block_self. */ static int -block_self (SCM queue, scm_i_pthread_mutex_t *mutex, +block_self (struct scm_wait_queue *queue, scm_i_pthread_mutex_t *mutex, const scm_t_timespec *waittime) { scm_thread *t = SCM_I_CURRENT_THREAD; - SCM q_handle; int err; if (scm_i_prepare_to_wait_on_cond (t, mutex, &t->sleep_cond)) return EINTR; t->block_asyncs++; - q_handle = enqueue (queue, scm_thread_handle (t)); + enqueue (queue, t); gc_deactivate (t->mutator); if (waittime == NULL) err = scm_i_scm_pthread_cond_wait (&t->sleep_cond, mutex); @@ -310,7 +282,7 @@ block_self (SCM queue, scm_i_pthread_mutex_t *mutex, report this only when no other error (such as a timeout) has happened above. */ - if (remqueue (queue, q_handle) && err == 0) + if (remqueue (t) && err == 0) err = EINTR; t->block_asyncs--; scm_i_wait_finished (t); @@ -319,14 +291,14 @@ block_self (SCM queue, scm_i_pthread_mutex_t *mutex, } /* Wake up the first thread on QUEUE, if any. The awoken thread is - returned, or #f if the queue was empty. + returned, or NULL if the queue was empty. */ -static SCM -unblock_from_queue (SCM queue) +static scm_thread * +unblock_from_queue (struct scm_wait_queue *queue) { - SCM thread = dequeue (queue); - if (scm_is_true (thread)) - scm_i_pthread_cond_signal (&SCM_I_THREAD_DATA(thread)->sleep_cond); + scm_thread *thread = dequeue (queue); + if (thread) + scm_i_pthread_cond_signal (&thread->sleep_cond); return thread; } @@ -380,6 +352,8 @@ guilify_self_1 (struct gc_mutator *mut, struct gc_stack_addr base, t->pending_asyncs = SCM_EOL; t->block_asyncs = 1; t->mutator = mut; + t->queue.thread = t; + t->queue.next = t->queue.prev = NULL; t->base = (SCM_STACKITEM *) gc_stack_addr_as_pointer (base); t->continuation_root = SCM_EOL; t->continuation_base = t->base; @@ -939,15 +913,18 @@ SCM_DEFINE (scm_make_mutex_with_kind, "make-mutex", 0, 1, 0, SCM_MISC_ERROR ("unsupported mutex kind: ~a", scm_list_1 (kind)); } - m = scm_allocate_tagged (SCM_I_CURRENT_THREAD, sizeof (struct scm_mutex)); + scm_thread *thr = SCM_I_CURRENT_THREAD; + m = scm_allocate_tagged (thr, sizeof (struct scm_mutex)); m->tag_and_flags = scm_tc16_mutex | (mkind << 16); - m->owner = SCM_BOOL_F; - m->waiting = make_queue (); + m->owner = NULL; + m->waiting.prev = m->waiting.next = &m->waiting; m->level = 0; /* Because PTHREAD_MUTEX_INITIALIZER is static, it's plain old data, and so we can just copy it. */ memcpy (&m->lock, &lock, sizeof (m->lock)); + scm_gc_pin_object (thr, scm_from_mutex (m)); + return scm_from_mutex (m); } #undef FUNC_NAME @@ -980,21 +957,19 @@ lock_mutex (enum scm_mutex_kind kind, struct scm_mutex *m, { scm_i_scm_pthread_mutex_lock (&m->lock); - if (scm_is_eq (m->owner, SCM_BOOL_F)) + if (!m->owner) { - m->owner = scm_thread_handle (current_thread); + m->owner = current_thread; scm_i_pthread_mutex_unlock (&m->lock); return SCM_BOOL_T; } - else if (kind == SCM_MUTEX_RECURSIVE && - scm_is_eq (m->owner, scm_thread_handle (current_thread))) + else if (kind == SCM_MUTEX_RECURSIVE && m->owner == current_thread) { m->level++; scm_i_pthread_mutex_unlock (&m->lock); return SCM_BOOL_T; } - else if (kind == SCM_MUTEX_STANDARD && - scm_is_eq (m->owner, scm_thread_handle (current_thread))) + else if (kind == SCM_MUTEX_STANDARD && m->owner == current_thread) { scm_i_pthread_mutex_unlock (&m->lock); SCM_MISC_ERROR ("mutex already locked by thread", SCM_EOL); @@ -1002,7 +977,7 @@ lock_mutex (enum scm_mutex_kind kind, struct scm_mutex *m, else while (1) { - int err = block_self (m->waiting, &m->lock, waittime); + int err = block_self (&m->waiting, &m->lock, waittime); if (err == 0) { @@ -1029,9 +1004,9 @@ lock_mutex (enum scm_mutex_kind kind, struct scm_mutex *m, } maybe_acquire: - if (scm_is_eq (m->owner, SCM_BOOL_F)) + if (!m->owner) { - m->owner = scm_thread_handle (current_thread); + m->owner = current_thread; scm_i_pthread_mutex_unlock (&m->lock); return SCM_BOOL_T; } @@ -1116,9 +1091,9 @@ unlock_mutex (enum scm_mutex_kind kind, struct scm_mutex *m, { scm_i_scm_pthread_mutex_lock (&m->lock); - if (!scm_is_eq (m->owner, scm_thread_handle (current_thread))) + if (m->owner != current_thread) { - if (scm_is_eq (m->owner, SCM_BOOL_F)) + if (!m->owner) { scm_i_pthread_mutex_unlock (&m->lock); SCM_MISC_ERROR ("mutex not locked", SCM_EOL); @@ -1135,9 +1110,9 @@ unlock_mutex (enum scm_mutex_kind kind, struct scm_mutex *m, m->level--; else { - m->owner = SCM_BOOL_F; + m->owner = NULL; /* Wake up one waiter. */ - unblock_from_queue (m->waiting); + unblock_from_queue (&m->waiting); } scm_i_pthread_mutex_unlock (&m->lock); @@ -1193,16 +1168,13 @@ SCM_DEFINE (scm_mutex_owner, "mutex-owner", 1, 0, 0, "Return the thread owning @var{mx}, or @code{#f}.") #define FUNC_NAME s_scm_mutex_owner { - SCM owner; - struct scm_mutex *m = NULL; - SCM_VALIDATE_MUTEX (1, mx); - m = scm_to_mutex (mx); + struct scm_mutex *m = scm_to_mutex (mx); scm_i_pthread_mutex_lock (&m->lock); - owner = m->owner; + scm_thread *owner = m->owner; scm_i_pthread_mutex_unlock (&m->lock); - return owner; + return owner ? scm_thread_handle (owner) : SCM_BOOL_F; } #undef FUNC_NAME @@ -1215,7 +1187,7 @@ SCM_DEFINE (scm_mutex_level, "mutex-level", 1, 0, 0, struct scm_mutex *m = scm_to_mutex (mx); if (scm_mutex_kind (m) == SCM_MUTEX_RECURSIVE) return scm_from_int (m->level + 1); - else if (scm_is_eq (m->owner, SCM_BOOL_F)) + else if (!m->owner) return SCM_INUM0; else return SCM_INUM1; @@ -1228,10 +1200,7 @@ SCM_DEFINE (scm_mutex_locked_p, "mutex-locked?", 1, 0, 0, #define FUNC_NAME s_scm_mutex_locked_p { SCM_VALIDATE_MUTEX (1, mx); - if (scm_is_eq (scm_to_mutex (mx)->owner, SCM_BOOL_F)) - return SCM_BOOL_F; - else - return SCM_BOOL_T; + return scm_from_bool (scm_to_mutex (mx)->owner != NULL); } #undef FUNC_NAME @@ -1254,10 +1223,11 @@ SCM_DEFINE (scm_make_condition_variable, "make-condition-variable", 0, 0, 0, "Make a new condition variable.") #define FUNC_NAME s_scm_make_condition_variable { - struct scm_cond *c = - scm_allocate_tagged (SCM_I_CURRENT_THREAD, sizeof (struct scm_cond)); + scm_thread *thr = SCM_I_CURRENT_THREAD; + struct scm_cond *c = scm_allocate_tagged (thr, sizeof (struct scm_cond)); c->tag = scm_tc16_condition_variable; - c->waiting = make_queue (); + c->waiting.prev = c->waiting.next = &c->waiting; + scm_gc_pin_object (thr, scm_from_condvar (c)); return scm_from_condvar (c); } #undef FUNC_NAME @@ -1269,9 +1239,9 @@ timed_wait (enum scm_mutex_kind kind, struct scm_mutex *m, struct scm_cond *c, { scm_i_scm_pthread_mutex_lock (&m->lock); - if (!scm_is_eq (m->owner, scm_thread_handle (current_thread))) + if (m->owner != current_thread) { - if (scm_is_eq (m->owner, SCM_BOOL_F)) + if (!m->owner) { scm_i_pthread_mutex_unlock (&m->lock); SCM_MISC_ERROR ("mutex not locked", SCM_EOL); @@ -1293,14 +1263,14 @@ timed_wait (enum scm_mutex_kind kind, struct scm_mutex *m, struct scm_cond *c, m->level--; else { - m->owner = SCM_BOOL_F; + m->owner = NULL; /* Wake up one waiter. */ - unblock_from_queue (m->waiting); + unblock_from_queue (&m->waiting); } /* Wait for someone to signal the cond, a timeout, or an interrupt. */ - err = block_self (c->waiting, &m->lock, waittime); + err = block_self (&c->waiting, &m->lock, waittime); /* We woke up for some reason. Reacquire the mutex before doing anything else. @@ -1320,8 +1290,7 @@ timed_wait (enum scm_mutex_kind kind, struct scm_mutex *m, struct scm_cond *c, not great. Maybe it's necessary but for now we just disable interrupts while reaquiring a mutex after a wait. */ current_thread->block_asyncs++; - if (kind == SCM_MUTEX_RECURSIVE && - scm_is_eq (m->owner, scm_thread_handle (current_thread))) + if (kind == SCM_MUTEX_RECURSIVE && m->owner == current_thread) { m->level++; scm_i_pthread_mutex_unlock (&m->lock); @@ -1329,13 +1298,13 @@ timed_wait (enum scm_mutex_kind kind, struct scm_mutex *m, struct scm_cond *c, else while (1) { - if (scm_is_eq (m->owner, SCM_BOOL_F)) + if (!m->owner) { - m->owner = scm_thread_handle (current_thread); + m->owner = current_thread; scm_i_pthread_mutex_unlock (&m->lock); break; } - block_self (m->waiting, &m->lock, waittime); + block_self (&m->waiting, &m->lock, waittime); } current_thread->block_asyncs--; @@ -1418,7 +1387,7 @@ SCM_DEFINE (scm_signal_condition_variable, "signal-condition-variable", 1, 0, 0, struct scm_cond *c; SCM_VALIDATE_CONDVAR (1, cv); c = scm_to_condvar (cv); - unblock_from_queue (c->waiting); + unblock_from_queue (&c->waiting); return SCM_BOOL_T; } #undef FUNC_NAME @@ -1431,7 +1400,7 @@ SCM_DEFINE (scm_broadcast_condition_variable, "broadcast-condition-variable", 1, struct scm_cond *c; SCM_VALIDATE_CONDVAR (1, cv); c = scm_to_condvar (cv); - while (scm_is_true (unblock_from_queue (c->waiting))) + while (unblock_from_queue (&c->waiting)) ; return SCM_BOOL_T; }