Peter Zijlstra writes: > The below compiles and boots, but is otherwise untested. Could you give > it a spin?
Thank you! Yes, I'll start a test now. -Gratian > --- > kernel/futex.c | 83 > +++++++++++++++++++++++++++++++++-------- > kernel/locking/rtmutex.c | 26 +++++++++---- > kernel/locking/rtmutex_common.h | 1 + > 3 files changed, 87 insertions(+), 23 deletions(-) > > diff --git a/kernel/futex.c b/kernel/futex.c > index 76ed5921117a..29ac5b64e7c7 100644 > --- a/kernel/futex.c > +++ b/kernel/futex.c > @@ -2294,21 +2294,17 @@ static void unqueue_me_pi(struct futex_q *q) > spin_unlock(q->lock_ptr); > } > > -/* > - * Fixup the pi_state owner with the new owner. > - * > - * Must be called with hash bucket lock held and mm->sem held for non > - * private futexes. > - */ > static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, > - struct task_struct *newowner) > + struct task_struct *argowner) > { > - u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; > struct futex_pi_state *pi_state = q->pi_state; > u32 uval, uninitialized_var(curval), newval; > - struct task_struct *oldowner; > + struct task_struct *oldowner, *newowner; > + u32 newtid; > int ret; > > + lockdep_assert_held(q->lock_ptr); > + > raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); > > oldowner = pi_state->owner; > @@ -2317,11 +2313,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, > struct futex_q *q, > newtid |= FUTEX_OWNER_DIED; > > /* > - * We are here either because we stole the rtmutex from the > - * previous highest priority waiter or we are the highest priority > - * waiter but have failed to get the rtmutex the first time. > + * We are here because either: > + * > + * - we stole the lock and pi_state->owner needs updating to reflect > + * that (@argowner == current), > * > - * We have to replace the newowner TID in the user space variable. > + * or: > + * > + * - someone stole our lock and we need to fix things to point to the > + * new owner (@argowner == NULL). > + * > + * Either way, we have to replace the TID in the user space variable. > * This must be atomic as we have to preserve the owner died bit here. > * > * Note: We write the user space value _before_ changing the pi_state > @@ -2334,6 +2336,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, > struct futex_q *q, > * in the PID check in lookup_pi_state. > */ > retry: > + if (!argowner) { > + if (oldowner != current) { > + /* > + * We raced against a concurrent self; things are > + * already fixed up. Nothing to do. > + */ > + ret = 0; > + goto out_unlock; > + } > + > + if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { > + /* We got the lock after all, nothing to fix. */ > + ret = 0; > + goto out_unlock; > + } > + > + /* > + * Since we just failed the trylock; there must be an owner. > + */ > + newowner = rt_mutex_owner(&pi_state->pi_mutex); > + BUG_ON(!newowner); > + } else { > + WARN_ON_ONCE(argowner != current); > + if (oldowner == current) { > + /* > + * We raced against a concurrent self; things are > + * already fixed up. Nothing to do. > + */ > + ret = 0; > + goto out_unlock; > + } > + newowner = argowner; > + } > + > + newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; > + > if (get_futex_value_locked(&uval, uaddr)) > goto handle_fault; > > @@ -2434,15 +2472,28 @@ static int fixup_owner(u32 __user *uaddr, struct > futex_q *q, int locked) > * Got the lock. We might not be the anticipated owner if we > * did a lock-steal - fix up the PI-state in that case: > * > - * We can safely read pi_state->owner without holding wait_lock > - * because we now own the rt_mutex, only the owner will attempt > - * to change it. > + * Speculative pi_state->owner read (we don't hold wait_lock); > + * since we own the lock pi_state->owner == current is the > + * stable state, anything else needs more attention. > */ > if (q->pi_state->owner != current) > ret = fixup_pi_state_owner(uaddr, q, current); > goto out; > } > > + /* > + * If we didn't get the lock; check if anybody stole it from us. In > + * that case, we need to fix up the uval to point to them instead of > + * us, otherwise bad things happen. [10] > + * > + * Another speculative read; pi_state->owner == current is unstable > + * but needs our attention. > + */ > + if (q->pi_state->owner == current) { > + ret = fixup_pi_state_owner(uaddr, q, NULL); > + goto out; > + } > + > /* > * Paranoia check. If we did not take the lock, then we should not be > * the owner of the rt_mutex. > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c > index 6f3dba6e4e9e..65cc0cb984e6 100644 > --- a/kernel/locking/rtmutex.c > +++ b/kernel/locking/rtmutex.c > @@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, > return ret; > } > > +static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) > +{ > + int ret = try_to_take_rt_mutex(lock, current, NULL); > + > + /* > + * try_to_take_rt_mutex() sets the lock waiters bit > + * unconditionally. Clean this up. > + */ > + fixup_rt_mutex_waiters(lock); > + > + return ret; > +} > + > /* > * Slow path try-lock function: > */ > @@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex > *lock) > */ > raw_spin_lock_irqsave(&lock->wait_lock, flags); > > - ret = try_to_take_rt_mutex(lock, current, NULL); > - > - /* > - * try_to_take_rt_mutex() sets the lock waiters bit > - * unconditionally. Clean this up. > - */ > - fixup_rt_mutex_waiters(lock); > + ret = __rt_mutex_slowtrylock(lock); > > raw_spin_unlock_irqrestore(&lock->wait_lock, flags); > > @@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex > *lock) > return rt_mutex_slowtrylock(lock); > } > > +int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) > +{ > + return __rt_mutex_slowtrylock(lock); > +} > + > /** > * rt_mutex_timed_lock - lock a rt_mutex interruptible > * the timeout structure is provided > diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h > index 124e98ca0b17..68686b3ec3c1 100644 > --- a/kernel/locking/rtmutex_common.h > +++ b/kernel/locking/rtmutex_common.h > @@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex > *lock, > struct rt_mutex_waiter *waiter); > > extern int rt_mutex_futex_trylock(struct rt_mutex *l); > +extern int __rt_mutex_futex_trylock(struct rt_mutex *l); > > extern void rt_mutex_futex_unlock(struct rt_mutex *lock); > extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,