https://github.com/python/cpython/commit/90fe3250f82712b61630d636246c92df7c40c816
commit: 90fe3250f82712b61630d636246c92df7c40c816
branch: main
author: Sam Gross <[email protected]>
committer: colesbury <[email protected]>
date: 2025-09-16T09:21:58+01:00
summary:
gh-137433: Fix deadlock with stop-the-world and daemon threads (gh-137735)
There was a deadlock originally seen by Memray when a daemon thread
enabled or disabled profiling while the interpreter was shutting down.
I think this could also happen with garbage collection, but I haven't
seen that in practice.
The daemon thread could be hung while trying acquire the global rwmutex
that prevents overlapping global and per-interpreter stop-the-world events.
Since it already held the main interpreter's stop-the-world lock, it
also deadlocked the main thread, which is trying to perform interpreter
finalization.
Swap the order of lock acquisition to prevent this deadlock.
Additionally, refactor `_PyParkingLot_Park` so that the global buckets
hashtable is left in a clean state if the thread is hung in
`PyEval_AcquireThread`.
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-13-39-02.gh-issue-137433.g6Atfz.rst
M Include/internal/pycore_semaphore.h
M Lib/test/test_threading.py
M Python/lock.c
M Python/parking_lot.c
M Python/pystate.c
diff --git a/Include/internal/pycore_semaphore.h
b/Include/internal/pycore_semaphore.h
index 269538384606ce..66b4939dcacf05 100644
--- a/Include/internal/pycore_semaphore.h
+++ b/Include/internal/pycore_semaphore.h
@@ -46,10 +46,8 @@ typedef struct _PySemaphore {
} _PySemaphore;
// Puts the current thread to sleep until _PySemaphore_Wakeup() is called.
-// If `detach` is true, then the thread will detach/release the GIL while
-// sleeping.
PyAPI_FUNC(int)
-_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout_ns, int detach);
+_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout_ns);
// Wakes up a single thread waiting on sema. Note that _PySemaphore_Wakeup()
// can be called before _PySemaphore_Wait().
diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py
index 6e612b06281ca2..774059c21d7246 100644
--- a/Lib/test/test_threading.py
+++ b/Lib/test/test_threading.py
@@ -1430,6 +1430,33 @@ def test_native_id_after_fork(self):
self.assertEqual(len(native_ids), 2)
self.assertNotEqual(native_ids[0], native_ids[1])
+ def test_stop_the_world_during_finalization(self):
+ # gh-137433: Test functions that trigger a stop-the-world in the free
+ # threading build concurrent with interpreter finalization.
+ script = """if True:
+ import gc
+ import sys
+ import threading
+ NUM_THREADS = 5
+ b = threading.Barrier(NUM_THREADS + 1)
+ def run_in_bg():
+ b.wait()
+ while True:
+ sys.setprofile(None)
+ gc.collect()
+
+ for _ in range(NUM_THREADS):
+ t = threading.Thread(target=run_in_bg, daemon=True)
+ t.start()
+
+ b.wait()
+ print("Exiting...")
+ """
+ rc, out, err = assert_python_ok('-c', script)
+ self.assertEqual(rc, 0)
+ self.assertEqual(err, b"")
+ self.assertEqual(out.strip(), b"Exiting...")
+
class ThreadJoinOnShutdown(BaseTestCase):
def _run_and_join(self, script):
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-13-39-02.gh-issue-137433.g6Atfz.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-13-39-02.gh-issue-137433.g6Atfz.rst
new file mode 100644
index 00000000000000..0389cb0c735128
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-13-39-02.gh-issue-137433.g6Atfz.rst
@@ -0,0 +1,3 @@
+Fix a potential deadlock in the :term:`free threading` build when daemon
+threads enable or disable profiling or tracing while the main thread is
+shutting down the interpreter.
diff --git a/Python/lock.c b/Python/lock.c
index a49d587a1686d9..98f3f89c201fef 100644
--- a/Python/lock.c
+++ b/Python/lock.c
@@ -227,7 +227,7 @@ _PyRawMutex_LockSlow(_PyRawMutex *m)
// Wait for us to be woken up. Note that we still have to lock the
// mutex ourselves: it is NOT handed off to us.
- _PySemaphore_Wait(&waiter.sema, -1, /*detach=*/0);
+ _PySemaphore_Wait(&waiter.sema, -1);
}
_PySemaphore_Destroy(&waiter.sema);
diff --git a/Python/parking_lot.c b/Python/parking_lot.c
index e896dea02712f2..99c1ad848be795 100644
--- a/Python/parking_lot.c
+++ b/Python/parking_lot.c
@@ -91,8 +91,8 @@ _PySemaphore_Destroy(_PySemaphore *sema)
#endif
}
-static int
-_PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout)
+int
+_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout)
{
int res;
#if defined(MS_WINDOWS)
@@ -225,27 +225,6 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t
timeout)
return res;
}
-int
-_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout, int detach)
-{
- PyThreadState *tstate = NULL;
- if (detach) {
- tstate = _PyThreadState_GET();
- if (tstate && _PyThreadState_IsAttached(tstate)) {
- // Only detach if we are attached
- PyEval_ReleaseThread(tstate);
- }
- else {
- tstate = NULL;
- }
- }
- int res = _PySemaphore_PlatformWait(sema, timeout);
- if (tstate) {
- PyEval_AcquireThread(tstate);
- }
- return res;
-}
-
void
_PySemaphore_Wakeup(_PySemaphore *sema)
{
@@ -342,7 +321,19 @@ _PyParkingLot_Park(const void *addr, const void *expected,
size_t size,
enqueue(bucket, addr, &wait);
_PyRawMutex_Unlock(&bucket->mutex);
- int res = _PySemaphore_Wait(&wait.sema, timeout_ns, detach);
+ PyThreadState *tstate = NULL;
+ if (detach) {
+ tstate = _PyThreadState_GET();
+ if (tstate && _PyThreadState_IsAttached(tstate)) {
+ // Only detach if we are attached
+ PyEval_ReleaseThread(tstate);
+ }
+ else {
+ tstate = NULL;
+ }
+ }
+
+ int res = _PySemaphore_Wait(&wait.sema, timeout_ns);
if (res == Py_PARK_OK) {
goto done;
}
@@ -354,7 +345,7 @@ _PyParkingLot_Park(const void *addr, const void *expected,
size_t size,
// Another thread has started to unpark us. Wait until we process the
// wakeup signal.
do {
- res = _PySemaphore_Wait(&wait.sema, -1, detach);
+ res = _PySemaphore_Wait(&wait.sema, -1);
} while (res != Py_PARK_OK);
goto done;
}
@@ -366,6 +357,9 @@ _PyParkingLot_Park(const void *addr, const void *expected,
size_t size,
done:
_PySemaphore_Destroy(&wait.sema);
+ if (tstate) {
+ PyEval_AcquireThread(tstate);
+ }
return res;
}
diff --git a/Python/pystate.c b/Python/pystate.c
index 2465d8667472dc..a39369efc07772 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -2253,13 +2253,15 @@ stop_the_world(struct _stoptheworld_state *stw)
{
_PyRuntimeState *runtime = &_PyRuntime;
- PyMutex_Lock(&stw->mutex);
+ // gh-137433: Acquire the rwmutex first to avoid deadlocks with daemon
+ // threads that may hang when blocked on lock acquisition.
if (stw->is_global) {
_PyRWMutex_Lock(&runtime->stoptheworld_mutex);
}
else {
_PyRWMutex_RLock(&runtime->stoptheworld_mutex);
}
+ PyMutex_Lock(&stw->mutex);
HEAD_LOCK(runtime);
stw->requested = 1;
@@ -2325,13 +2327,13 @@ start_the_world(struct _stoptheworld_state *stw)
}
stw->requester = NULL;
HEAD_UNLOCK(runtime);
+ PyMutex_Unlock(&stw->mutex);
if (stw->is_global) {
_PyRWMutex_Unlock(&runtime->stoptheworld_mutex);
}
else {
_PyRWMutex_RUnlock(&runtime->stoptheworld_mutex);
}
- PyMutex_Unlock(&stw->mutex);
}
#endif // Py_GIL_DISABLED
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]