From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Add regular waiters in stamp order. Keep adding waiters that have no
context in FIFO order and take care not to starve them.

While adding our task as a waiter, back off if we detect that there is a
waiter with a lower stamp in front of us.

Make sure to call lock_contended even when we back off early.

For w/w mutexes, being first in the wait list is only stable when taking the
lock without a context. Therefore, the purpose of the first flag is split into
two: 'first' remains to indicate whether we want to spin optimistically, while
'handoff' indicates that we should be prepared to accept a handoff.

For w/w locking with a context, we always accept handoffs after the first
schedule(), to handle the following sequence of events:

1. Task #0 unlocks and hands off to Task #2 which is first in line
2. Task #1 adds itself in front of Task #2
3. Task #2 wakes up and must accept the handoff even though it is no longer
   first in line

v2:
- rein in the indentation of __ww_mutex_add_waiter a bit
- set contending_lock in __ww_mutex_add_waiter (Chris Wilson)

v3:
- split 'first' into 'first' and 'handoff' to avoid moving the trylock calls
  around so much
- scan the wait_list in reverse order in __ww_mutex_add_waiter

Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Maarten Lankhorst <d...@mblankhorst.nl>
Cc: Daniel Vetter <dan...@ffwll.ch>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: dri-de...@lists.freedesktop.org
Signed-off-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
---
 include/linux/mutex.h  |  3 ++
 kernel/locking/mutex.c | 97 +++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index b97870f..118a3b6 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -20,6 +20,8 @@
 #include <linux/osq_lock.h>
 #include <linux/debug_locks.h>
 
+struct ww_acquire_ctx;
+
 /*
  * Simple, straightforward mutexes with strict semantics:
  *
@@ -75,6 +77,7 @@ static inline struct task_struct *__mutex_owner(struct mutex 
*lock)
 struct mutex_waiter {
        struct list_head        list;
        struct task_struct      *task;
+       struct ww_acquire_ctx   *ww_ctx;
 #ifdef CONFIG_DEBUG_MUTEXES
        void                    *magic;
 #endif
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 282c6de..5b1ca20 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -620,6 +620,52 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct 
ww_acquire_ctx *ctx)
        return 0;
 }
 
+static inline int __sched
+__ww_mutex_add_waiter(struct mutex_waiter *waiter,
+                     struct mutex *lock,
+                     struct ww_acquire_ctx *ww_ctx)
+{
+       struct mutex_waiter *cur;
+       struct list_head *pos;
+
+       if (!ww_ctx) {
+               list_add_tail(&waiter->list, &lock->wait_list);
+               return 0;
+       }
+
+       /*
+        * Add the waiter before the first waiter with a higher stamp.
+        * Waiters without a context are skipped to avoid starving
+        * them.
+        */
+       pos = &lock->wait_list;
+       list_for_each_entry_reverse(cur, &lock->wait_list, list) {
+               if (!cur->ww_ctx)
+                       continue;
+
+               if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
+                       /* Back off immediately if necessary. */
+                       if (ww_ctx->acquired > 0) {
+#ifdef CONFIG_DEBUG_MUTEXES
+                               struct ww_mutex *ww;
+
+                               ww = container_of(lock, struct ww_mutex, base);
+                               DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock);
+                               ww_ctx->contending_lock = ww;
+#endif
+                               return -EDEADLK;
+                       }
+
+                       break;
+               }
+
+               pos = &cur->list;
+       }
+
+       list_add_tail(&waiter->list, pos);
+       return 0;
+}
+
 /*
  * Lock a mutex (possibly interruptible), slowpath:
  */
@@ -632,6 +678,7 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
        struct mutex_waiter waiter;
        unsigned long flags;
        bool first = false;
+       bool handoff = false;
        struct ww_mutex *ww;
        int ret;
 
@@ -665,15 +712,25 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
        debug_mutex_lock_common(lock, &waiter);
        debug_mutex_add_waiter(lock, &waiter, task);
 
-       /* add waiting tasks to the end of the waitqueue (FIFO): */
-       list_add_tail(&waiter.list, &lock->wait_list);
+       lock_contended(&lock->dep_map, ip);
+
+       if (!use_ww_ctx) {
+               /* add waiting tasks to the end of the waitqueue (FIFO): */
+               list_add_tail(&waiter.list, &lock->wait_list);
+       } else {
+               /* Add in stamp order, waking up waiters that must back off. */
+               ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
+               if (ret)
+                       goto err_early_backoff;
+
+               waiter.ww_ctx = ww_ctx;
+       }
+
        waiter.task = task;
 
        if (__mutex_waiter_is_first(lock, &waiter))
                __mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
 
-       lock_contended(&lock->dep_map, ip);
-
        set_task_state(task, state);
        for (;;) {
                /*
@@ -682,7 +739,7 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
                 * before testing the error conditions to make sure we pick up
                 * the handoff.
                 */
-               if (__mutex_trylock(lock, first))
+               if (__mutex_trylock(lock, handoff))
                        goto acquired;
 
                /*
@@ -711,13 +768,34 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
                 * or we must see its unlock and acquire.
                 */
 
-               if (!first && __mutex_waiter_is_first(lock, &waiter)) {
-                       first = true;
+               if (use_ww_ctx && ww_ctx) {
+                       /*
+                        * Always re-check whether we're in first position. We
+                        * don't want to spin if another task with a lower
+                        * stamp has taken our position.
+                        *
+                        * We also may have to set the handoff flag again, if
+                        * our position at the head was temporarily taken away.
+                        */
+                       first = __mutex_waiter_is_first(lock, &waiter);
+
+                       if (first)
+                               __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
+
+                       /*
+                        * Always be prepared to accept a handoff after the
+                        * first wait, because we may have been the first
+                        * waiter during unlock.
+                        */
+                       handoff = true;
+               } else if (!first && __mutex_waiter_is_first(lock, &waiter)) {
+                       first = handoff = true;
                        __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
                }
 
-               if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, 
true)) ||
-                    __mutex_trylock(lock, first))
+               if ((first &&
+                    mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) ||
+                   __mutex_trylock(lock, handoff))
                        break;
 
                spin_lock_mutex(&lock->wait_lock, flags);
@@ -746,6 +824,7 @@ __mutex_lock_common(struct mutex *lock, long state, 
unsigned int subclass,
 err:
        __set_task_state(task, TASK_RUNNING);
        mutex_remove_waiter(lock, &waiter, task);
+err_early_backoff:
        spin_unlock_mutex(&lock->wait_lock, flags);
        debug_mutex_free_waiter(&waiter);
        mutex_release(&lock->dep_map, 1, ip);
-- 
2.7.4

Reply via email to