On Fri, Mar 06, 2026 at 05:06:14PM -0800, Brian Bunker wrote: > Add a test that reproduces the race condition in the TUR checker where > the checker thread has finished updating ct->state and ct->msgid (under > lock) but hasn't yet cleared ct->running. In this race window, the > code incorrectly returns PATH_PENDING even though a valid result is > already available. > > NOTE: This test currently fails, demonstrating the bug exists. > > The test includes two cases: > - test_race_window_result_ready: Simulates the race window where > ct->running is still set but ct->msgid indicates the check has > completed. This currently returns PATH_PENDING instead of the > actual result (PATH_UP). > - test_thread_still_running: Verifies that when ct->msgid is > MSG_TUR_RUNNING (thread genuinely hasn't finished), PATH_PENDING > is correctly returned. >
I thought your test program could show the TUR checker hanging. This is just showing the known behavior, and I don't see a way for it to hang. Also, unless you actually got the TUR checker to hang, I don't know of anyone who has ever reported a hang. -Ben > == running tur_race-test == > [==========] Running 2 test(s). > [ RUN ] test_race_window_result_ready > [ ERROR ] --- 0x6 != 0x3 > [ LINE ] --- tur_race.c:104: error: Failure! > [ FAILED ] test_race_window_result_ready > [ RUN ] test_thread_still_running > [ OK ] test_thread_still_running > [==========] 2 test(s) run. > [ PASSED ] 1 test(s). > [ FAILED ] 1 test(s), listed below: > [ FAILED ] test_race_window_result_ready > > 1 FAILED TEST(S) > > Signed-off-by: Brian Bunker <[email protected]> > --- > tests/Makefile | 3 +- > tests/tur_race.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 177 insertions(+), 1 deletion(-) > create mode 100644 tests/tur_race.c > > diff --git a/tests/Makefile b/tests/Makefile > index 9f1b950f..3d033d9a 100644 > --- a/tests/Makefile > +++ b/tests/Makefile > @@ -9,7 +9,7 @@ CFLAGS += $(BIN_CFLAGS) -Wno-unused-parameter > -Wno-unused-function $(W_MISSING_I > LIBDEPS += -L. -L $(mpathutildir) -L$(mpathcmddir) -lmultipath -lmpathutil > -lmpathcmd -lcmocka > > TESTS := uevent parser util dmevents hwtable blacklist unaligned vpd > pgpolicy \ > - alias directio valid devt mpathvalid strbuf sysfs features cli mapinfo > + alias directio valid devt mpathvalid strbuf sysfs features cli mapinfo > tur_race > HELPERS := test-lib.o test-log.o > > .PRECIOUS: $(TESTS:%=%-test) > @@ -65,6 +65,7 @@ features-test_LIBDEPS := -ludev -lpthread > features-test_OBJDEPS := $(mpathutildir)/mt-libudev.o > cli-test_OBJDEPS := $(daemondir)/cli.o > mapinfo-test_LIBDEPS = -lpthread -ldevmapper > +tur_race-test_LIBDEPS := -lpthread -lurcu > > %.o: %.c > @echo building $@ because of $? > diff --git a/tests/tur_race.c b/tests/tur_race.c > new file mode 100644 > index 00000000..00ad3cea > --- /dev/null > +++ b/tests/tur_race.c > @@ -0,0 +1,175 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Test for TUR checker race condition. > + * > + * This test demonstrates a race condition where the checker thread has > + * finished (set state/msgid under lock) but hasn't yet cleared ct->running. > + * Currently, libcheck_check() incorrectly returns PATH_PENDING even though > + * a valid result is available. > + * > + * NOTE: test_race_window_result_ready currently FAILS, demonstrating the > bug. > + */ > + > +#define _GNU_SOURCE > +#include <stdint.h> > +#include <stdbool.h> > +#include <stdarg.h> > +#include <stddef.h> > +#include <setjmp.h> > +#include <stdlib.h> > +#include <pthread.h> > +#include <time.h> > +#include "cmocka-compat.h" > + > +#include "globals.c" > +#include "../libmultipath/checkers/tur.c" > + > +/* Mock device for testing */ > +static dev_t test_devt; > + > +static int setup(void **state) > +{ > + test_devt = makedev(8, 0); > + return 0; > +} > + > +static int teardown(void **state) > +{ > + return 0; > +} > + > +/* > + * Test: Simulate the race window where thread has finished but running != 0 > + * > + * This simulates the state between: > + * pthread_mutex_unlock(&ct->lock); // thread released lock > + * uatomic_set(&ct->running, 0); // thread hasn't cleared running yet > + * > + * In this window: > + * - ct->state and ct->msgid have the final result > + * - ct->running is still 1 > + * > + * BUG: The code sees running != 0 and returns PATH_PENDING, ignoring > + * the valid result that is already available. > + * > + * This test currently FAILS, demonstrating the race condition. > + */ > +static void test_race_window_result_ready(void **state) > +{ > + struct checker c = {0}; > + struct tur_checker_context *ct; > + int result; > + > + /* Initialize the checker context manually to avoid dependencies */ > + ct = calloc(1, sizeof(struct tur_checker_context)); > + assert_non_null(ct); > + > + pthread_mutex_init(&ct->lock, NULL); > + pthread_cond_init(&ct->active, NULL); > + uatomic_set(&ct->holders, 1); > + ct->state = PATH_UNCHECKED; > + ct->fd = -1; > + > + c.fd = 1; > + c.timeout = 30; > + c.context = ct; > + assert_non_null(ct); > + > + /* > + * Simulate race condition state: > + * - Thread has finished and set state/msgid > + * - Thread has NOT yet cleared running > + * - ct->thread is set (appears to be an active check) > + */ > + ct->thread = (pthread_t)1; /* Non-zero: appears to be running */ > + ct->devt = test_devt; > + > + pthread_mutex_lock(&ct->lock); > + ct->state = PATH_UP; > + ct->msgid = CHECKER_MSGID_UP; > + pthread_mutex_unlock(&ct->lock); > + > + uatomic_set(&ct->running, 1); /* Thread "still running" */ > + ct->time = time(NULL) + 100; /* Not timed out */ > + > + /* Call libcheck_check - this is where the race manifests */ > + result = libcheck_check(&c); > + > + /* > + * Expected: PATH_UP (the result is ready) > + * Actual (BUG): PATH_PENDING (ignores ready result) > + * > + * This assertion currently FAILS, demonstrating the bug. > + */ > + assert_int_equal(result, PATH_UP); > + assert_int_equal(c.msgid, CHECKER_MSGID_UP); > + > + /* ct->thread was cleared by libcheck_check when it collected result */ > + ct->thread = 0; > + uatomic_set(&ct->running, 0); > + pthread_mutex_destroy(&ct->lock); > + pthread_cond_destroy(&ct->active); > + free(ct); > +} > + > +/* > + * Test: Verify thread still running returns PATH_PENDING > + * > + * When the thread genuinely hasn't finished (msgid == MSG_TUR_RUNNING), > + * we should correctly return PATH_PENDING. > + */ > +static void test_thread_still_running(void **state) > +{ > + struct checker c = {0}; > + struct tur_checker_context *ct; > + int result; > + > + /* Initialize the checker context manually to avoid dependencies */ > + ct = calloc(1, sizeof(struct tur_checker_context)); > + assert_non_null(ct); > + > + pthread_mutex_init(&ct->lock, NULL); > + pthread_cond_init(&ct->active, NULL); > + uatomic_set(&ct->holders, 1); > + ct->state = PATH_UNCHECKED; > + ct->fd = -1; > + > + c.fd = 1; > + c.timeout = 30; > + c.context = ct; > + > + /* Thread is genuinely still running - no result yet */ > + ct->thread = (pthread_t)1; > + ct->devt = test_devt; > + > + pthread_mutex_lock(&ct->lock); > + ct->state = PATH_PENDING; > + ct->msgid = MSG_TUR_RUNNING; /* Thread hasn't set result yet */ > + pthread_mutex_unlock(&ct->lock); > + > + uatomic_set(&ct->running, 1); > + ct->time = time(NULL) + 100; > + > + result = libcheck_check(&c); > + > + /* Should correctly return PATH_PENDING */ > + assert_int_equal(result, PATH_PENDING); > + > + ct->thread = 0; > + uatomic_set(&ct->running, 0); > + pthread_mutex_destroy(&ct->lock); > + pthread_cond_destroy(&ct->active); > + free(ct); > +} > + > +int main(void) > +{ > + const struct CMUnitTest tests[] = { > + cmocka_unit_test_setup_teardown(test_race_window_result_ready, > + setup, teardown), > + cmocka_unit_test_setup_teardown(test_thread_still_running, > + setup, teardown), > + }; > + return cmocka_run_group_tests(tests, NULL, NULL); > +} > + > -- > 2.52.0
