SISEGV has been observed in the runner test in our Gitlab CI with
MUSL libc. It can happen that a thread terminates before pthread_cancel()
is called, causing the error. Avoid it by waiting until the thread has
actually been cancelled.

Signed-off-by: Martin Wilck <[email protected]>
---
 libmpathutil/runner.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/libmpathutil/runner.c b/libmpathutil/runner.c
index 56abd03..459af13 100644
--- a/libmpathutil/runner.c
+++ b/libmpathutil/runner.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 // Copyright (c) 2026 SUSE LLC
 #include <assert.h>
+#include <sched.h>
 #include <time.h>
 #include <pthread.h>
 #include <urcu/uatomic.h>
@@ -47,9 +48,24 @@ static void cleanup_context(struct runner_context **prctx)
                return;
 
        st = uatomic_cmpxchg(&rctx->status, RUNNER_RUNNING, RUNNER_DONE);
+       /*
+        * If it finds the thread in RUNNER_RUNNING state, cancel_runner() sets
+        * the state to RUNNER_CANCELLED before actually cancelling it.
+        * If the thread terminates between these two points in time,
+        * pthread_cancel() may access a pthread_t for an already cleaned-up
+        * thread. Therefore wait here until the thread has actually been
+        * cancelled, after which cancel_runner() will set the state to
+        * RUNNER_DEAD. Whether the thread will actually see this value is
+        * implementation-dependent.
+        */
+       if (st == RUNNER_CANCELLED) {
+               do
+                       sched_yield();
+               while (uatomic_read(&rctx->status) == RUNNER_CANCELLED);
+       }
        if (st != RUNNER_RUNNING) {
                uatomic_cmpxchg(&rctx->status, st, RUNNER_DEAD);
-               condlog(st == RUNNER_CANCELLED ? 3 : 2,
+               condlog(st == RUNNER_DEAD || st == RUNNER_CANCELLED ? 3 : 2,
                        "%s: runner %p finished in state '%s'", __func__, rctx,
                        runner_state_name(st));
        }
@@ -116,6 +132,8 @@ repeat:
                break;
        case RUNNER_RUNNING:
                pthread_cancel(rctx->thr);
+               assert(uatomic_cmpxchg(&rctx->status, RUNNER_CANCELLED,
+                                      RUNNER_DEAD) == RUNNER_CANCELLED);
                st_new = RUNNER_CANCELLED;
                /* fallthrough */
        case RUNNER_CANCELLED:
-- 
2.54.0


Reply via email to