Add a test that validates the ENQUEUE_REPLENISH flag is correctly set during PI boosting after a task is changed via sched_setscheduler().
The test reproduces a specific scenario where: 1. Task B (DEADLINE, short deadline) holds a PI mutex 2. Task A (DEADLINE, long deadline) blocks on Task B's mutex 3. Task B doesn't inherit from Task A (B has higher priority) 4. Task B is changed from SCHED_DEADLINE to SCHED_IDLE via setscheduler 5. Task B should now inherit DEADLINE from Task A with ENQUEUE_REPLENISH Without the fix, the ENQUEUE_REPLENISH flag is missing when Task B inherits the DEADLINE attributes from Task A, causing bandwidth accounting corruption and potential system hangs. The test uses pthreads with PI mutexes to orchestrate the scenario and includes a timeout mechanism to detect if the bug causes a hang. Assisted-by: Claude Code: claude-sonnet-4-5@20250929 Signed-off-by: Juri Lelli <[email protected]> --- tools/testing/selftests/sched/deadline/Makefile | 5 +- .../selftests/sched/deadline/replenish_bug.c | 337 +++++++++++++++++++++ 2 files changed, 341 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/sched/deadline/Makefile b/tools/testing/selftests/sched/deadline/Makefile index e7e16c610ee58..aa7752da1bdcf 100644 --- a/tools/testing/selftests/sched/deadline/Makefile +++ b/tools/testing/selftests/sched/deadline/Makefile @@ -14,7 +14,7 @@ OUTPUT_DIR := $(OUTPUT) UTIL_OBJS := $(OUTPUT)/dl_util.o # Test object files (all .c files except runner.c, dl_util.c, cpuhog.c) -TEST_OBJS := $(OUTPUT)/basic.o $(OUTPUT)/bandwidth.o $(OUTPUT)/fair_server.o +TEST_OBJS := $(OUTPUT)/basic.o $(OUTPUT)/bandwidth.o $(OUTPUT)/fair_server.o $(OUTPUT)/replenish_bug.o # Runner binary links utility and test objects $(OUTPUT)/runner: runner.c $(UTIL_OBJS) $(TEST_OBJS) dl_test.h | $(OUTPUT_DIR) @@ -38,6 +38,9 @@ $(OUTPUT)/bandwidth.o: bandwidth.c dl_test.h dl_util.h | $(OUTPUT_DIR) $(OUTPUT)/fair_server.o: fair_server.c dl_test.h dl_util.h | $(OUTPUT_DIR) $(CC) $(CFLAGS) -c $< -o $@ +$(OUTPUT)/replenish_bug.o: replenish_bug.c dl_test.h dl_util.h | $(OUTPUT_DIR) + $(CC) $(CFLAGS) -c $< -o $@ + $(OUTPUT_DIR): mkdir -p $@ diff --git a/tools/testing/selftests/sched/deadline/replenish_bug.c b/tools/testing/selftests/sched/deadline/replenish_bug.c new file mode 100644 index 0000000000000..016007d04c95a --- /dev/null +++ b/tools/testing/selftests/sched/deadline/replenish_bug.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SCHED_DEADLINE ENQUEUE_REPLENISH Bug Test + * + * Reproduces the scenario where: + * 1. Task B (DEADLINE, short deadline) holds a PI mutex + * 2. Task A (DEADLINE, long deadline) blocks on Task B's mutex + * 3. Task B doesn't inherit from Task A (B has shorter deadline = higher priority) + * 4. sched_setscheduler() changes Task B from DEADLINE to IDLE + * 5. Task B should now inherit DEADLINE from Task A with ENQUEUE_REPLENISH + * + * Without the fix, ENQUEUE_REPLENISH flag is missing, causing: + * "DL de-boosted task PID X: REPLENISH flag missing" + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> +#include <sched.h> +#include <errno.h> +#include <stdint.h> +#include <time.h> +#include <signal.h> +#include "dl_test.h" +#include "dl_util.h" + +/* Thread context for the test */ +struct replenish_test_ctx { + pthread_mutex_t pi_mutex; + pthread_barrier_t barrier; + pthread_t holder; + pthread_t waiter; + volatile int holder_ready; + volatile int waiter_blocked; + volatile int test_done; + volatile int timeout_occurred; + volatile pid_t holder_tid; + volatile pid_t waiter_tid; +}; + +/* Timeout handler */ +static void timeout_handler(int sig) +{ + printf("\n\n!!! TIMEOUT !!!\n"); + printf("Test appears to have hung - likely due to the bug being triggered!\n"); + printf("This indicates the ENQUEUE_REPLENISH bug corrupted bandwidth accounting.\n"); + printf("\nCheck kernel log:\n"); + printf(" sudo dmesg | tail -50\n"); + printf("\nLook for:\n"); + printf(" 'REPLENISH flag missing'\n"); + printf(" 'dl_runtime_exceeded' or bandwidth warnings\n"); +} + +static void print_sched_info(const char *label, pid_t tid) +{ + struct sched_attr attr = {0}; + + if (dl_get_sched_attr(tid, &attr) == 0) { + printf("[%s] TID %d: policy=%u prio=%d", + label, tid, attr.sched_policy, attr.sched_priority); + if (attr.sched_policy == SCHED_DEADLINE) { + printf(" runtime=%llu deadline=%llu period=%llu", + (unsigned long long)attr.sched_runtime, + (unsigned long long)attr.sched_deadline, + (unsigned long long)attr.sched_period); + } + printf("\n"); + } +} + +static int set_sched_idle(pid_t tid) +{ + struct sched_param param = {0}; + return sched_setscheduler(tid, SCHED_IDLE, ¶m); +} + +/* + * Thread B: DEADLINE task (SHORT deadline) that holds the PI mutex + * This will be setscheduled to IDLE, triggering the bug + */ +static void *holder_thread(void *arg) +{ + struct replenish_test_ctx *ctx = arg; + + ctx->holder_tid = gettid(); + printf("\n=== HOLDER (Task B) thread started (TID %d) ===\n", + ctx->holder_tid); + + /* Set to DEADLINE with a SHORT deadline (high priority) */ + if (dl_set_sched_attr(ctx->holder_tid, dl_ms_to_ns(5), + dl_ms_to_ns(30), dl_ms_to_ns(60)) < 0) { + perror("holder: dl_set_sched_attr"); + return NULL; + } + + print_sched_info("HOLDER-INIT", ctx->holder_tid); + + /* Lock the mutex */ + pthread_mutex_lock(&ctx->pi_mutex); + printf("[HOLDER] TID %d: Locked PI mutex\n", ctx->holder_tid); + + /* Signal we're ready */ + ctx->holder_ready = 1; + + /* Wait at barrier */ + pthread_barrier_wait(&ctx->barrier); + + /* Keep holding the mutex while waiter blocks and gets setscheduled */ + while (!ctx->test_done) + usleep(10000); /* 10ms */ + + printf("[HOLDER] TID %d: Unlocking PI mutex\n", ctx->holder_tid); + pthread_mutex_unlock(&ctx->pi_mutex); + + printf("[HOLDER] TID %d: Exiting\n", ctx->holder_tid); + return NULL; +} + +/* + * Thread A: DEADLINE task (LONG deadline) that will block on the mutex + * This is the pi_task that holder will inherit from after setscheduler + */ +static void *waiter_thread(void *arg) +{ + struct replenish_test_ctx *ctx = arg; + + ctx->waiter_tid = gettid(); + printf("\n=== WAITER (Task A) thread started (TID %d) ===\n", + ctx->waiter_tid); + + /* Set to DEADLINE with a LONG deadline (low priority) */ + if (dl_set_sched_attr(ctx->waiter_tid, dl_ms_to_ns(10), + dl_ms_to_ns(50), dl_ms_to_ns(100)) < 0) { + perror("waiter: dl_set_sched_attr"); + return NULL; + } + + print_sched_info("WAITER-INIT", ctx->waiter_tid); + + /* Wait for holder to lock the mutex */ + while (!ctx->holder_ready) + usleep(1000); + + /* Wait at barrier */ + pthread_barrier_wait(&ctx->barrier); + + printf("[WAITER] TID %d: Attempting to lock PI mutex (will block)...\n", + ctx->waiter_tid); + + /* This will block because holder has the lock */ + ctx->waiter_blocked = 1; + pthread_mutex_lock(&ctx->pi_mutex); + + /* Eventually we get the lock */ + printf("[WAITER] TID %d: Acquired PI mutex\n", ctx->waiter_tid); + print_sched_info("WAITER-AFTER", ctx->waiter_tid); + + pthread_mutex_unlock(&ctx->pi_mutex); + printf("[WAITER] TID %d: Unlocked PI mutex\n", ctx->waiter_tid); + printf("[WAITER] TID %d: Exiting\n", ctx->waiter_tid); + + return NULL; +} + +/* + * Test: DEADLINE ENQUEUE_REPLENISH Bug + * + * Verifies that when a SCHED_DEADLINE task holding a PI mutex is changed + * to SCHED_IDLE while a lower-priority DEADLINE task is blocked on that + * mutex, the ENQUEUE_REPLENISH flag is correctly set during PI boosting. + */ +static enum dl_test_status test_replenish_bug_run(void *arg) +{ + struct replenish_test_ctx *ctx = arg; + struct sigaction sa; + + printf("======================================\n"); + printf("DEADLINE ENQUEUE_REPLENISH Bug Test\n"); + printf("======================================\n"); + printf("Timeout: 5 seconds\n"); + printf("\nThis test reproduces the scenario where:\n"); + printf("1. Task B (DEADLINE, short deadline) holds a PI mutex\n"); + printf("2. Task A (DEADLINE, long deadline) blocks on Task B's mutex\n"); + printf("3. Task B doesn't inherit from A (B has higher priority)\n"); + printf("4. Task B gets setscheduled to SCHED_IDLE (while A still blocked)\n"); + printf("5. Task B should now inherit from A with ENQUEUE_REPLENISH\n"); + printf("\nWithout fix: Missing ENQUEUE_REPLENISH flag causes WARNING\n"); + printf("\nCheck dmesg for:\n"); + printf(" 'DL de-boosted task PID X: REPLENISH flag missing'\n"); + printf("\nNOTE: If test hangs and times out, the bug was triggered!\n"); + printf("======================================\n\n"); + + /* Set up timeout handler */ + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = timeout_handler; + sigaction(SIGALRM, &sa, NULL); + + /* Set timeout (5 seconds) */ + alarm(5); + + /* Initialize barrier for 2 threads */ + DL_FAIL_IF(pthread_barrier_init(&ctx->barrier, NULL, 2) != 0, + "pthread_barrier_init failed"); + + /* Create holder thread (will lock mutex) */ + if (pthread_create(&ctx->holder, NULL, holder_thread, ctx) != 0) { + pthread_barrier_destroy(&ctx->barrier); + DL_FAIL("pthread_create holder failed: %s", strerror(errno)); + } + + /* Create waiter thread (will block on mutex) */ + if (pthread_create(&ctx->waiter, NULL, waiter_thread, ctx) != 0) { + pthread_barrier_destroy(&ctx->barrier); + DL_FAIL("pthread_create waiter failed: %s", strerror(errno)); + } + + /* Give threads time to start */ + sleep(1); + + /* Wait for waiter to block on the mutex */ + printf("\n[MAIN] Waiting for waiter to block on mutex...\n"); + while (!ctx->waiter_blocked) + usleep(1000); + + /* Give it a moment to actually block */ + usleep(50000); /* 50ms */ + + printf("\n[MAIN] Holder TID: %d\n", ctx->holder_tid); + print_sched_info("HOLDER-HOLDING", ctx->holder_tid); + + /* + * THE BUG TRIGGER: + * Holder (Task B) is DEADLINE with short deadline (high priority). + * Waiter (Task A) is DEADLINE with long deadline (low priority), blocked. + * Holder didn't inherit from waiter (holder has higher priority). + * Now change HOLDER from DEADLINE to SCHED_IDLE. + * Holder should inherit DEADLINE from waiter with ENQUEUE_REPLENISH, + * but without the fix, it doesn't. + */ + printf("\n[MAIN] *** Changing HOLDER (Task B) from SCHED_DEADLINE to SCHED_IDLE ***\n"); + printf("[MAIN] *** This triggers the bug! ***\n"); + + if (set_sched_idle(ctx->holder_tid) < 0) { + ctx->test_done = 1; + pthread_join(ctx->holder, NULL); + pthread_join(ctx->waiter, NULL); + pthread_barrier_destroy(&ctx->barrier); + DL_FAIL("set_sched_idle failed: %s", strerror(errno)); + } + + printf("[MAIN] Successfully changed holder to SCHED_IDLE\n"); + print_sched_info("HOLDER-SETSCHEDULED", ctx->holder_tid); + + /* Let the scenario play out */ + usleep(100000); /* 100ms */ + + /* Signal threads to finish */ + ctx->test_done = 1; + + /* Wait for threads */ + pthread_join(ctx->holder, NULL); + pthread_join(ctx->waiter, NULL); + + /* Cancel the alarm - we completed successfully */ + alarm(0); + + pthread_barrier_destroy(&ctx->barrier); + + DL_FAIL_IF(ctx->timeout_occurred, "Test timed out - bug was triggered!"); + + printf("\n======================================\n"); + printf("Test completed successfully!\n"); + printf("======================================\n"); + printf("\nNo timeout occurred - fix appears to be working.\n"); + printf("\nCheck kernel log:\n"); + printf(" sudo dmesg | tail -50\n"); + printf("\nLook for:\n"); + printf(" 'DL de-boosted task PID X: REPLENISH flag missing'\n"); + printf(" 'dl_runtime_exceeded' or bandwidth warnings\n"); + printf("\n"); + + return DL_TEST_PASS; +} + +static enum dl_test_status test_replenish_bug_setup(void **ctx_ptr) +{ + struct replenish_test_ctx *ctx; + pthread_mutexattr_t attr; + + ctx = calloc(1, sizeof(*ctx)); + DL_FAIL_IF(!ctx, "Failed to allocate test context"); + + /* Initialize PI mutex */ + if (pthread_mutexattr_init(&attr) != 0) { + free(ctx); + DL_FAIL("pthread_mutexattr_init failed"); + } + + if (pthread_mutexattr_setprotocol(&attr, PTHREAD_PRIO_INHERIT) != 0) { + pthread_mutexattr_destroy(&attr); + free(ctx); + DL_FAIL("pthread_mutexattr_setprotocol failed"); + } + + if (pthread_mutex_init(&ctx->pi_mutex, &attr) != 0) { + pthread_mutexattr_destroy(&attr); + free(ctx); + DL_FAIL("pthread_mutex_init failed"); + } + + pthread_mutexattr_destroy(&attr); + + *ctx_ptr = ctx; + return DL_TEST_PASS; +} + +static void test_replenish_bug_cleanup(void *arg) +{ + struct replenish_test_ctx *ctx = arg; + + if (ctx) { + pthread_mutex_destroy(&ctx->pi_mutex); + free(ctx); + } +} + +static struct dl_test test_replenish_bug = { + .name = "replenish_bug", + .description = "Verify ENQUEUE_REPLENISH flag is set during PI boosting after setscheduler", + .setup = test_replenish_bug_setup, + .run = test_replenish_bug_run, + .cleanup = test_replenish_bug_cleanup, +}; +REGISTER_DL_TEST(&test_replenish_bug); -- 2.53.0

