On Wed, Feb 19, 2025 at 07:12:23PM +0300, Vitalii Mordan wrote: > TSAN reports a potential data race on the state field of > ThreadPoolElement. This is fixed by using atomic access to the field.
The tsan output from the bug report:
WARNING: ThreadSanitizer: data race (pid=787043)
Write of size 4 at 0x7b1c00000660 by thread T5 (mutexes: write M0):
#0 worker_thread /home/mordan/qemu/build/../util/thread-pool.c:108:20
(test-thread-pool-smc+0xa65a56)
#1 qemu_thread_start
/home/mordan/qemu/build/../util/qemu-thread-posix.c:543:9
(test-thread-pool-smc+0xa49040)
Previous read of size 4 at 0x7b1c00000660 by main thread:
#0 thread_pool_completion_bh
/home/mordan/qemu/build/../util/thread-pool.c:183:19
(test-thread-pool-smc+0xa6549d)
#1 aio_bh_call /home/mordan/qemu/build/../util/async.c:171:5
(test-thread-pool-smc+0xa5e03e)
#2 aio_bh_poll /home/mordan/qemu/build/../util/async.c:218:13
(test-thread-pool-smc+0xa5e03e)
#3 aio_poll /home/mordan/qemu/build/../util/aio-posix.c:722:17
(test-thread-pool-smc+0xa4343a)
#4 test_submit_many
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:133:9
(test-thread-pool-smc+0x50e638)
#5 do_test_cancel
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5
(test-thread-pool-smc+0x50e638)
#6 test_cancel_async
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5
(test-thread-pool-smc+0x50e638)
#7 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3
(test-thread-pool-smc+0x50e638)
Location is heap block of size 104 at 0x7b1c00000620 allocated by main thread:
#0 malloc
out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:667:5
(test-thread-pool-smc+0x346131)
#1 g_malloc <null> (libglib-2.0.so.0+0x5e738) (BuildId:
e845b8fd2f396872c036976626389ffc4f50c9c5)
#2 thread_pool_submit_aio
/home/mordan/qemu/build/../util/thread-pool.c:251:11
(test-thread-pool-smc+0xa648bd)
#3 test_submit_many
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:128:9
(test-thread-pool-smc+0x50e600)
#4 do_test_cancel
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5
(test-thread-pool-smc+0x50e600)
#5 test_cancel_async
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5
(test-thread-pool-smc+0x50e600)
#6 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3
(test-thread-pool-smc+0x50e600)
Mutex M0 (0x7b3c00000100) created at:
#0 pthread_mutex_init
out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1316:3
(test-thread-pool-smc+0x34914f)
#1 qemu_mutex_init
/home/mordan/qemu/build/../util/qemu-thread-posix.c:71:11
(test-thread-pool-smc+0xa47189)
#2 thread_pool_init_one /home/mordan/qemu/build/../util/thread-pool.c:334:5
(test-thread-pool-smc+0xa64f60)
#3 thread_pool_new /home/mordan/qemu/build/../util/thread-pool.c:348:5
(test-thread-pool-smc+0xa64f60)
#4 aio_get_thread_pool /home/mordan/qemu/build/../util/async.c:441:28
(test-thread-pool-smc+0xa5e6d4)
#5 thread_pool_submit_aio
/home/mordan/qemu/build/../util/thread-pool.c:246:24
(test-thread-pool-smc+0xa6488d)
#6 test_submit_many
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:128:9
(test-thread-pool-smc+0x50e600)
#7 do_test_cancel
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5
(test-thread-pool-smc+0x50e600)
#8 test_cancel_async
/home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5
(test-thread-pool-smc+0x50e600)
#9 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3
(test-thread-pool-smc+0x50e600)
Thread T5 'worker' (tid=787049, running) created by thread T4 at:
#0 pthread_create
out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1022:3
(test-thread-pool-smc+0x34791d)
#1 qemu_thread_create
/home/mordan/qemu/build/../util/qemu-thread-posix.c:583:11
(test-thread-pool-smc+0xa48ed0)
#2 do_spawn_thread /home/mordan/qemu/build/../util/thread-pool.c:146:5
(test-thread-pool-smc+0xa658de)
#3 worker_thread /home/mordan/qemu/build/../util/thread-pool.c:83:5
(test-thread-pool-smc+0xa658de)
#4 qemu_thread_start
/home/mordan/qemu/build/../util/qemu-thread-posix.c:543:9
(test-thread-pool-smc+0xa49040)
SUMMARY: ThreadSanitizer: data race
/home/mordan/qemu/build/../util/thread-pool.c:108:20 in worker_thread
My interpretation is that tsan is saying there is a data race between
the load in thread_pool_completion_bh():
static void thread_pool_completion_bh(void *opaque)
{
ThreadPool *pool = opaque;
ThreadPoolElement *elem, *next;
defer_call_begin(); /* cb() may use defer_call() to coalesce work */
restart:
QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
if (elem->state != THREAD_DONE) {
^^^^^^^^^^^
and the store in worker_thread():
req = QTAILQ_FIRST(&pool->request_list);
QTAILQ_REMOVE(&pool->request_list, req, reqs);
req->state = THREAD_ACTIVE;
^^^^^^^^^^^^^^^^^^^^^^^^^^^
qemu_mutex_unlock(&pool->lock);
It doesn't matter whether thread_pool_completion_bh() sees THREAD_QUEUED
or THREAD_ACTIVE, so this looks like a false positive. There is no
practical effect either way.
THREAD_QUEUED vs THREAD_ACTIVE matters in thread_pool_cancel(), but that
is protected by pool->lock.
Paolo: Any thoughts?
Stefan
>
> Fixes: d354c7eccf ("aio: add generic thread-pool facility")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2822
> Signed-off-by: Vitalii Mordan <[email protected]>
> ---
> util/thread-pool.c | 13 ++++++-------
> 1 file changed, 6 insertions(+), 7 deletions(-)
>
> diff --git a/util/thread-pool.c b/util/thread-pool.c
> index 27eb777e85..6c5f4d085b 100644
> --- a/util/thread-pool.c
> +++ b/util/thread-pool.c
> @@ -111,9 +111,8 @@ static void *worker_thread(void *opaque)
> ret = req->func(req->arg);
>
> req->ret = ret;
> - /* Write ret before state. */
> - smp_wmb();
> - req->state = THREAD_DONE;
> + /* Atomically update state after setting ret. */
> + qatomic_store_release(&req->state, THREAD_DONE);
>
> qemu_bh_schedule(pool->completion_bh);
> qemu_mutex_lock(&pool->lock);
> @@ -180,7 +179,7 @@ static void thread_pool_completion_bh(void *opaque)
>
> restart:
> QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
> - if (elem->state != THREAD_DONE) {
> + if (qatomic_load_acquire(&elem->state) != THREAD_DONE) {
> continue;
> }
>
> @@ -223,12 +222,12 @@ static void thread_pool_cancel(BlockAIOCB *acb)
> trace_thread_pool_cancel(elem, elem->common.opaque);
>
> QEMU_LOCK_GUARD(&pool->lock);
> - if (elem->state == THREAD_QUEUED) {
> + if (qatomic_load_acquire(&elem->state) == THREAD_QUEUED) {
> QTAILQ_REMOVE(&pool->request_list, elem, reqs);
> qemu_bh_schedule(pool->completion_bh);
>
> - elem->state = THREAD_DONE;
> elem->ret = -ECANCELED;
> + qatomic_store_release(&elem->state, THREAD_DONE);
> }
>
> }
> @@ -251,8 +250,8 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func,
> void *arg,
> req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
> req->func = func;
> req->arg = arg;
> - req->state = THREAD_QUEUED;
> req->pool = pool;
> + qatomic_store_release(&req->state, THREAD_QUEUED);
>
> QLIST_INSERT_HEAD(&pool->head, req, all);
>
> --
> 2.34.1
>
signature.asc
Description: PGP signature
