From: Peter W.Morreale <[EMAIL PROTECTED]> This patch adds the adaptive spin lock busywait to rtmutexes. It adds a new tunable: rtmutex_timeout, which is the companion to the rtlock_timeout tunable.
Signed-off-by: Peter W. Morreale <[EMAIL PROTECTED]> --- kernel/Kconfig.preempt | 37 +++++++++++++++++++++++++++++++++++++ kernel/rtmutex.c | 44 ++++++++++++++++++++++++++------------------ kernel/rtmutex_adaptive.h | 32 ++++++++++++++++++++++++++++++-- kernel/sysctl.c | 10 ++++++++++ 4 files changed, 103 insertions(+), 20 deletions(-) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index eebec19..d2b0daa 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -223,6 +223,43 @@ config RTLOCK_DELAY tunable at runtime via a sysctl. A setting of 0 (zero) disables the adaptive algorithm entirely. +config ADAPTIVE_RTMUTEX + bool "Adaptive real-time mutexes" + default y + depends on ADAPTIVE_RTLOCK + help + This option adds the adaptive rtlock spin/sleep algorithm to + rtmutexes. In rtlocks, a significant gain in throughput + can be seen by allowing rtlocks to spin for a distinct + amount of time prior to going to sleep for deadlock avoidence. + + Typically, mutexes are used when a critical section may need to + sleep due to a blocking operation. In the event the critical + section does not need to sleep, an additional gain in throughput + can be seen by avoiding the extra overhead of sleeping. + + This option alters the rtmutex code to use an adaptive + spin/sleep algorithm. It will spin unless it determines it must + sleep to avoid deadlock. This offers a best of both worlds + solution since we achieve both high-throughput and low-latency. + + If unsure, say Y + +config RTMUTEX_DELAY + int "Default delay (in loops) for adaptive mutexes" + range 0 10000000 + depends on ADAPTIVE_RTMUTEX + default "3000" + help + This allows you to specify the maximum delay a task will use + to wait for a rt mutex before going to sleep. Note that that + although the delay is implemented as a preemptable loop, tasks + of like priority cannot preempt each other and this setting can + result in increased latencies. + + The value is tunable at runtime via a sysctl. A setting of 0 + (zero) disables the adaptive algorithm entirely. + config SPINLOCK_BKL bool "Old-Style Big Kernel Lock" depends on (PREEMPT || SMP) && !PREEMPT_RT diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 4a7423f..a7ed7b2 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -24,6 +24,10 @@ int rtlock_timeout __read_mostly = CONFIG_RTLOCK_DELAY; #endif +#ifdef CONFIG_ADAPTIVE_RTMUTEX +int rtmutex_timeout __read_mostly = CONFIG_RTMUTEX_DELAY; +#endif + /* * lock->owner state tracking: * @@ -521,17 +525,16 @@ static void wakeup_next_waiter(struct rt_mutex *lock, int savestate) * Do the wakeup before the ownership change to give any spinning * waiter grantees a headstart over the other threads that will * trigger once owner changes. + * + * This may appear to be a race, but the barriers close the + * window. */ - if (!savestate) - wake_up_process(pendowner); - else { - smp_mb(); - /* - * This may appear to be a race, but the barriers close the - * window. - */ - if ((pendowner->state != TASK_RUNNING) - && (pendowner->state != TASK_RUNNING_MUTEX)) + smp_mb(); + if ((pendowner->state != TASK_RUNNING) + && (pendowner->state != TASK_RUNNING_MUTEX)) { + if (!savestate) + wake_up_process(pendowner); + else wake_up_process_mutex(pendowner); } @@ -764,7 +767,7 @@ rt_spin_lock_slowlock(struct rt_mutex *lock) debug_rt_mutex_print_deadlock(&waiter); /* adaptive_wait() returns 1 if we need to sleep */ - if (adaptive_wait(lock, &waiter, &adaptive)) { + if (adaptive_wait(lock, 0, &waiter, &adaptive)) { update_current(TASK_UNINTERRUPTIBLE, &saved_state); if (waiter.task) schedule_rt_mutex(lock); @@ -975,6 +978,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, int ret = 0, saved_lock_depth = -1; struct rt_mutex_waiter waiter; unsigned long flags; + DECLARE_ADAPTIVE_MUTEX_WAITER(adaptive); debug_rt_mutex_init_waiter(&waiter); waiter.task = NULL; @@ -995,8 +999,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, if (unlikely(current->lock_depth >= 0)) saved_lock_depth = rt_release_bkl(lock, flags); - set_current_state(state); - /* Setup the timer, when timeout != NULL */ if (unlikely(timeout)) hrtimer_start(&timeout->timer, timeout->timer.expires, @@ -1049,6 +1051,9 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, if (unlikely(ret)) break; } + + mutex_prepare_adaptive_wait(lock, &adaptive); + saved_flags = current->flags & PF_NOSCHED; current->flags &= ~PF_NOSCHED; @@ -1056,17 +1061,20 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, debug_rt_mutex_print_deadlock(&waiter); - if (waiter.task) - schedule_rt_mutex(lock); + if (mutex_adaptive_wait(lock, + (state == TASK_INTERRUPTIBLE), + &waiter, &adaptive)) { + set_current_state(state); + if (waiter.task) + schedule_rt_mutex(lock); + set_current_state(TASK_RUNNING); + } spin_lock_irq(&lock->wait_lock); current->flags |= saved_flags; - set_current_state(state); } - set_current_state(TASK_RUNNING); - if (unlikely(waiter.task)) remove_waiter(lock, &waiter, flags); diff --git a/kernel/rtmutex_adaptive.h b/kernel/rtmutex_adaptive.h index b7e282b..72f8def 100644 --- a/kernel/rtmutex_adaptive.h +++ b/kernel/rtmutex_adaptive.h @@ -56,7 +56,8 @@ struct adaptive_waiter { * */ static inline int -adaptive_wait(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, +adaptive_wait(struct rt_mutex *lock, int interruptible, + struct rt_mutex_waiter *waiter, struct adaptive_waiter *adaptive) { int sleep = 0; @@ -77,6 +78,14 @@ adaptive_wait(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, if (adaptive->owner != rt_mutex_owner(lock)) break; +#ifdef CONFIG_ADAPTIVE_RTMUTEX + /* + * Mutexes may need to check for signals... + */ + if (interruptible && signal_pending(current)) + break; +#endif + /* * If we got here, presumably the lock ownership is still * current. We will use it to our advantage to be able to @@ -132,10 +141,29 @@ extern int rtlock_timeout; #define DECLARE_ADAPTIVE_WAITER(name) -#define adaptive_wait(lock, waiter, busy) 1 +#define adaptive_wait(lock, intr, waiter, busy) 1 #define prepare_adaptive_wait(lock, busy) {} #endif /* CONFIG_ADAPTIVE_RTLOCK */ +#ifdef CONFIG_ADAPTIVE_RTMUTEX + +#define mutex_adaptive_wait adaptive_wait +#define mutex_prepare_adaptive_wait prepare_adaptive_wait + +extern int rtmutex_timeout; + +#define DECLARE_ADAPTIVE_MUTEX_WAITER(name) \ + struct adaptive_waiter name = { .owner = NULL, \ + .timeout = rtmutex_timeout, } + +#else + +#define DECLARE_ADAPTIVE_MUTEX_WAITER(name) + +#define mutex_adaptive_wait(lock, intr, waiter, busy) 1 +#define mutex_prepare_adaptive_wait(lock, busy) {} + +#endif /* CONFIG_ADAPTIVE_RTMUTEX */ #endif /* __KERNEL_RTMUTEX_ADAPTIVE_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 36259e4..3465af2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -976,6 +976,16 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_ADAPTIVE_RTMUTEX + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rtmutex_timeout", + .data = &rtmutex_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_FS { .ctl_name = CTL_UNNUMBERED, -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/