From: Peter W.Morreale <[EMAIL PROTECTED]>

This patch adds the adaptive spin lock busywait to rtmutexes.  It adds
a new tunable: rtmutex_timeout, which is the companion to the
rtlock_timeout tunable.

Signed-off-by: Peter W. Morreale <[EMAIL PROTECTED]>
---

 kernel/Kconfig.preempt    |   37 +++++++++++++++++++++++++++++++++++++
 kernel/rtmutex.c          |   44 ++++++++++++++++++++++++++------------------
 kernel/rtmutex_adaptive.h |   32 ++++++++++++++++++++++++++++++--
 kernel/sysctl.c           |   10 ++++++++++
 4 files changed, 103 insertions(+), 20 deletions(-)

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index eebec19..d2b0daa 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -223,6 +223,43 @@ config RTLOCK_DELAY
         tunable at runtime via a sysctl.  A setting of 0 (zero) disables
         the adaptive algorithm entirely.
 
+config ADAPTIVE_RTMUTEX
+        bool "Adaptive real-time mutexes"
+        default y
+        depends on ADAPTIVE_RTLOCK
+        help
+         This option adds the adaptive rtlock spin/sleep algorithm to
+         rtmutexes.  In rtlocks, a significant gain in throughput
+         can be seen by allowing rtlocks to spin for a distinct
+         amount of time prior to going to sleep for deadlock avoidence.
+ 
+         Typically, mutexes are used when a critical section may need to
+         sleep due to a blocking operation.  In the event the critical 
+        section does not need to sleep, an additional gain in throughput 
+        can be seen by avoiding the extra overhead of sleeping.
+ 
+         This option alters the rtmutex code to use an adaptive
+         spin/sleep algorithm.  It will spin unless it determines it must
+         sleep to avoid deadlock.  This offers a best of both worlds
+         solution since we achieve both high-throughput and low-latency.
+ 
+         If unsure, say Y
+ 
+config RTMUTEX_DELAY
+        int "Default delay (in loops) for adaptive mutexes"
+        range 0 10000000
+        depends on ADAPTIVE_RTMUTEX
+        default "3000"
+        help
+         This allows you to specify the maximum delay a task will use
+        to wait for a rt mutex before going to sleep.  Note that that
+        although the delay is implemented as a preemptable loop, tasks
+        of like priority cannot preempt each other and this setting can
+        result in increased latencies.
+        
+         The value is tunable at runtime via a sysctl.  A setting of 0
+        (zero) disables the adaptive algorithm entirely.
+
 config SPINLOCK_BKL
        bool "Old-Style Big Kernel Lock"
        depends on (PREEMPT || SMP) && !PREEMPT_RT
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 4a7423f..a7ed7b2 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -24,6 +24,10 @@
 int rtlock_timeout __read_mostly = CONFIG_RTLOCK_DELAY;
 #endif
 
+#ifdef CONFIG_ADAPTIVE_RTMUTEX
+int rtmutex_timeout __read_mostly = CONFIG_RTMUTEX_DELAY;
+#endif
+
 /*
  * lock->owner state tracking:
  *
@@ -521,17 +525,16 @@ static void wakeup_next_waiter(struct rt_mutex *lock, int 
savestate)
         * Do the wakeup before the ownership change to give any spinning
         * waiter grantees a headstart over the other threads that will
         * trigger once owner changes.
+        *
+        * This may appear to be a race, but the barriers close the
+        * window.
         */
-       if (!savestate)
-               wake_up_process(pendowner);
-       else {
-               smp_mb();
-               /*
-                * This may appear to be a race, but the barriers close the
-                * window.
-                */
-               if ((pendowner->state != TASK_RUNNING)
-                   && (pendowner->state != TASK_RUNNING_MUTEX))
+       smp_mb();
+       if ((pendowner->state != TASK_RUNNING)
+           && (pendowner->state != TASK_RUNNING_MUTEX)) {
+               if (!savestate)
+                       wake_up_process(pendowner);
+               else
                        wake_up_process_mutex(pendowner);
        }
 
@@ -764,7 +767,7 @@ rt_spin_lock_slowlock(struct rt_mutex *lock)
                debug_rt_mutex_print_deadlock(&waiter);
 
                /* adaptive_wait() returns 1 if we need to sleep */
-               if (adaptive_wait(lock, &waiter, &adaptive)) {
+               if (adaptive_wait(lock, 0, &waiter, &adaptive)) {
                        update_current(TASK_UNINTERRUPTIBLE, &saved_state);
                        if (waiter.task)
                                schedule_rt_mutex(lock);
@@ -975,6 +978,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
        int ret = 0, saved_lock_depth = -1;
        struct rt_mutex_waiter waiter;
        unsigned long flags;
+       DECLARE_ADAPTIVE_MUTEX_WAITER(adaptive);
 
        debug_rt_mutex_init_waiter(&waiter);
        waiter.task = NULL;
@@ -995,8 +999,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
        if (unlikely(current->lock_depth >= 0))
                saved_lock_depth = rt_release_bkl(lock, flags);
 
-       set_current_state(state);
-
        /* Setup the timer, when timeout != NULL */
        if (unlikely(timeout))
                hrtimer_start(&timeout->timer, timeout->timer.expires,
@@ -1049,6 +1051,9 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
                        if (unlikely(ret))
                                break;
                }
+
+               mutex_prepare_adaptive_wait(lock, &adaptive);
+
                saved_flags = current->flags & PF_NOSCHED;
                current->flags &= ~PF_NOSCHED;
 
@@ -1056,17 +1061,20 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 
                debug_rt_mutex_print_deadlock(&waiter);
 
-               if (waiter.task)
-                       schedule_rt_mutex(lock);
+               if (mutex_adaptive_wait(lock,
+                                       (state == TASK_INTERRUPTIBLE),
+                                       &waiter, &adaptive)) {
+                       set_current_state(state);
+                       if (waiter.task)
+                               schedule_rt_mutex(lock);
+                       set_current_state(TASK_RUNNING);
+               }
 
                spin_lock_irq(&lock->wait_lock);
 
                current->flags |= saved_flags;
-               set_current_state(state);
        }
 
-       set_current_state(TASK_RUNNING);
-
        if (unlikely(waiter.task))
                remove_waiter(lock, &waiter, flags);
 
diff --git a/kernel/rtmutex_adaptive.h b/kernel/rtmutex_adaptive.h
index b7e282b..72f8def 100644
--- a/kernel/rtmutex_adaptive.h
+++ b/kernel/rtmutex_adaptive.h
@@ -56,7 +56,8 @@ struct adaptive_waiter {
  *
  */
 static inline int
-adaptive_wait(struct rt_mutex *lock, struct rt_mutex_waiter *waiter,
+adaptive_wait(struct rt_mutex *lock, int interruptible,
+             struct rt_mutex_waiter *waiter,
              struct adaptive_waiter *adaptive)
 {
        int sleep = 0;
@@ -77,6 +78,14 @@ adaptive_wait(struct rt_mutex *lock, struct rt_mutex_waiter 
*waiter,
                if (adaptive->owner != rt_mutex_owner(lock))
                        break;
 
+#ifdef CONFIG_ADAPTIVE_RTMUTEX
+               /*
+                * Mutexes may need to check for signals...
+                */
+               if (interruptible && signal_pending(current))
+                       break;
+#endif
+
                /*
                 * If we got here, presumably the lock ownership is still
                 * current.  We will use it to our advantage to be able to
@@ -132,10 +141,29 @@ extern int rtlock_timeout;
 
 #define DECLARE_ADAPTIVE_WAITER(name)
 
-#define adaptive_wait(lock, waiter, busy) 1
+#define adaptive_wait(lock, intr, waiter, busy) 1
 #define prepare_adaptive_wait(lock, busy) {}
 
 #endif /* CONFIG_ADAPTIVE_RTLOCK */
 
+#ifdef CONFIG_ADAPTIVE_RTMUTEX
+
+#define mutex_adaptive_wait         adaptive_wait
+#define mutex_prepare_adaptive_wait prepare_adaptive_wait
+
+extern int rtmutex_timeout;
+
+#define DECLARE_ADAPTIVE_MUTEX_WAITER(name) \
+     struct adaptive_waiter name = { .owner = NULL,               \
+                                     .timeout = rtmutex_timeout, }
+
+#else
+
+#define DECLARE_ADAPTIVE_MUTEX_WAITER(name)
+
+#define mutex_adaptive_wait(lock, intr, waiter, busy) 1
+#define mutex_prepare_adaptive_wait(lock, busy) {}
+
+#endif /* CONFIG_ADAPTIVE_RTMUTEX */
 
 #endif /* __KERNEL_RTMUTEX_ADAPTIVE_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 36259e4..3465af2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -976,6 +976,16 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
+#ifdef CONFIG_ADAPTIVE_RTMUTEX
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "rtmutex_timeout",
+               .data           = &rtmutex_timeout,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+#endif
 #ifdef CONFIG_PROC_FS
        {
                .ctl_name       = CTL_UNNUMBERED,

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to