On Thu, Dec 07, 2017 at 11:45:16AM +0100, Peter Zijlstra wrote:
> diff --git a/kernel/futex.c b/kernel/futex.c
> index 76ed5921117a..8ad5221fbd84 100644
> --- a/kernel/futex.c
> +++ b/kernel/futex.c
> @@ -2303,14 +2303,35 @@ static void unqueue_me_pi(struct futex_q *q)
>  static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
>                               struct task_struct *newowner)
>  {
> -     u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
>       struct futex_pi_state *pi_state = q->pi_state;
>       u32 uval, uninitialized_var(curval), newval;
>       struct task_struct *oldowner;
> +     u32 newtid;
>       int ret;
>  
>       raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
>  
> +     if (!newowner) {
> +             if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
> +                     ret = 0;
> +                     goto out_unlock;
> +             }
> +
> +             newowner = rt_mutex_owner(&pi_state->pi_mutex);
> +             if (WARN_ON_ONCE(!newowner)) {
> +                     /*
> +                      * We just attempted a trylock; since that failed there
> +                      * must be an owner, right?
> +                      */
> +                     ret = -EFUCKED; /* XXX: check return paths */
> +                     goto out_unlock;
> +             }
> +
> +             /* OK we have a newowner, fixup uval */
> +     }
> +
> +     newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
> +
>       oldowner = pi_state->owner;
>       /* Owner died? */
>       if (!pi_state->owner)

OK, this is broken because it needs to be inside the retry label,
because we drop the locks when we take a fault. And if we drop the lock,
the rt_mutex owner can change etc..

> @@ -2443,6 +2464,16 @@ static int fixup_owner(u32 __user *uaddr, struct 
> futex_q *q, int locked)
>               goto out;
>       }
>  
> +     /*
> +      * If we didn't get the lock; check if nobody stole it from us.
> +      * In that case, we need to fix up the uval to point to them
> +      * instead of us, otherwise bad things happen.
> +      */
> +     if (q->pi_state->owner == current) {
> +             ret = fixup_pi_state_owner(uaddr, q, NULL);
> +             goto out;
> +     }

And this had me read the comment just above here that stated the
unlocked access of ->owner is fine, because only the rt_mutex owner
changes pi_state, except of course, this just changed that.

So now we need double check the state after we've taken the lock in
fixup_pi_state_owner().


The below compiles and boots, but is otherwise untested. Could you give
it a spin?

---
 kernel/futex.c                  | 83 +++++++++++++++++++++++++++++++++--------
 kernel/locking/rtmutex.c        | 26 +++++++++----
 kernel/locking/rtmutex_common.h |  1 +
 3 files changed, 87 insertions(+), 23 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 76ed5921117a..29ac5b64e7c7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2294,21 +2294,17 @@ static void unqueue_me_pi(struct futex_q *q)
        spin_unlock(q->lock_ptr);
 }
 
-/*
- * Fixup the pi_state owner with the new owner.
- *
- * Must be called with hash bucket lock held and mm->sem held for non
- * private futexes.
- */
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-                               struct task_struct *newowner)
+                               struct task_struct *argowner)
 {
-       u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
        struct futex_pi_state *pi_state = q->pi_state;
        u32 uval, uninitialized_var(curval), newval;
-       struct task_struct *oldowner;
+       struct task_struct *oldowner, *newowner;
+       u32 newtid;
        int ret;
 
+       lockdep_assert_held(q->lock_ptr);
+
        raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
 
        oldowner = pi_state->owner;
@@ -2317,11 +2313,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, 
struct futex_q *q,
                newtid |= FUTEX_OWNER_DIED;
 
        /*
-        * We are here either because we stole the rtmutex from the
-        * previous highest priority waiter or we are the highest priority
-        * waiter but have failed to get the rtmutex the first time.
+        * We are here because either:
+        *
+        *  - we stole the lock and pi_state->owner needs updating to reflect
+        *    that (@argowner == current),
         *
-        * We have to replace the newowner TID in the user space variable.
+        * or:
+        *
+        *  - someone stole our lock and we need to fix things to point to the
+        *    new owner (@argowner == NULL).
+        *
+        * Either way, we have to replace the TID in the user space variable.
         * This must be atomic as we have to preserve the owner died bit here.
         *
         * Note: We write the user space value _before_ changing the pi_state
@@ -2334,6 +2336,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, 
struct futex_q *q,
         * in the PID check in lookup_pi_state.
         */
 retry:
+       if (!argowner) {
+               if (oldowner != current) {
+                       /*
+                        * We raced against a concurrent self; things are
+                        * already fixed up. Nothing to do.
+                        */
+                       ret = 0;
+                       goto out_unlock;
+               }
+
+               if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
+                       /* We got the lock after all, nothing to fix. */
+                       ret = 0;
+                       goto out_unlock;
+               }
+
+               /*
+                * Since we just failed the trylock; there must be an owner.
+                */
+               newowner = rt_mutex_owner(&pi_state->pi_mutex);
+               BUG_ON(!newowner);
+       } else {
+               WARN_ON_ONCE(argowner != current);
+               if (oldowner == current) {
+                       /*
+                        * We raced against a concurrent self; things are
+                        * already fixed up. Nothing to do.
+                        */
+                       ret = 0;
+                       goto out_unlock;
+               }
+               newowner = argowner;
+       }
+
+       newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
+
        if (get_futex_value_locked(&uval, uaddr))
                goto handle_fault;
 
@@ -2434,15 +2472,28 @@ static int fixup_owner(u32 __user *uaddr, struct 
futex_q *q, int locked)
                 * Got the lock. We might not be the anticipated owner if we
                 * did a lock-steal - fix up the PI-state in that case:
                 *
-                * We can safely read pi_state->owner without holding wait_lock
-                * because we now own the rt_mutex, only the owner will attempt
-                * to change it.
+                * Speculative pi_state->owner read (we don't hold wait_lock);
+                * since we own the lock pi_state->owner == current is the
+                * stable state, anything else needs more attention.
                 */
                if (q->pi_state->owner != current)
                        ret = fixup_pi_state_owner(uaddr, q, current);
                goto out;
        }
 
+       /*
+        * If we didn't get the lock; check if anybody stole it from us. In
+        * that case, we need to fix up the uval to point to them instead of
+        * us, otherwise bad things happen. [10]
+        *
+        * Another speculative read; pi_state->owner == current is unstable
+        * but needs our attention.
+        */
+       if (q->pi_state->owner == current) {
+               ret = fixup_pi_state_owner(uaddr, q, NULL);
+               goto out;
+       }
+
        /*
         * Paranoia check. If we did not take the lock, then we should not be
         * the owner of the rt_mutex.
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 6f3dba6e4e9e..65cc0cb984e6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
        return ret;
 }
 
+static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock)
+{
+       int ret = try_to_take_rt_mutex(lock, current, NULL);
+
+       /*
+        * try_to_take_rt_mutex() sets the lock waiters bit
+        * unconditionally. Clean this up.
+        */
+       fixup_rt_mutex_waiters(lock);
+
+       return ret;
+}
+
 /*
  * Slow path try-lock function:
  */
@@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex 
*lock)
         */
        raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
-       ret = try_to_take_rt_mutex(lock, current, NULL);
-
-       /*
-        * try_to_take_rt_mutex() sets the lock waiters bit
-        * unconditionally. Clean this up.
-        */
-       fixup_rt_mutex_waiters(lock);
+       ret = __rt_mutex_slowtrylock(lock);
 
        raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
@@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
        return rt_mutex_slowtrylock(lock);
 }
 
+int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+{
+       return __rt_mutex_slowtrylock(lock);
+}
+
 /**
  * rt_mutex_timed_lock - lock a rt_mutex interruptible
  *                     the timeout structure is provided
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 124e98ca0b17..68686b3ec3c1 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex 
*lock,
                                 struct rt_mutex_waiter *waiter);
 
 extern int rt_mutex_futex_trylock(struct rt_mutex *l);
+extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
 
 extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
 extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,

Reply via email to