Stefan,

On Tue, 11 Dec 2018, Stefan Liebler wrote:
> does this also handle the ESRCH returned by
> attach_to_pi_owner(...)
> {...
>       if (!pid)
>               return -ESRCH;
>       p = find_get_task_by_vpid(pid);
>       if (!p)
>               return -ESRCH;
> ...
> 
> I think pid should never be zero when attach_to_pi_owner is called.

Yeah, I just checked again. It's a paranoid check.

> But it can happen that p is null? At least I traced the "return -ESRCH" with
> the 4.17 kernel. Unfortunately both returns were done by the same instruction
> address.

Yes, you are right. We need the same sanity check for that part. Updated
patch below.

Now I "just" have to come up with a cure for that livelock thing ....

Thanks,

        tglx

8<--------------
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1148,11 +1148,65 @@ static int attach_to_pi_state(u32 __user
        return ret;
 }
 
+static int handle_exit_race(u32 __user *uaddr, u32 uval,
+                           struct task_struct *tsk)
+{
+       u32 uval2;
+
+       /*
+        * If PF_EXITPIDONE is not yet set, then try again.
+        */
+       if (tsk && !(tsk->flags & PF_EXITPIDONE))
+               return -EAGAIN;
+
+       /*
+        * Reread the user space value to handle the following situation:
+        *
+        * CPU0                         CPU1
+        *
+        * sys_exit()                   sys_futex()
+        *  do_exit()                    futex_lock_pi()
+        *                                futex_lock_pi_atomic()
+        *   exit_signals(tsk)              No waiters:
+        *    tsk->flags |= PF_EXITING;     *uaddr == 0x00000PID
+        *  mm_release(tsk)                 Set waiter bit
+        *   exit_robust_list(tsk) {        *uaddr = 0x80000PID;
+        *      Set owner died              attach_to_pi_owner() {
+        *    *uaddr = 0xC0000000;           tsk = get_task(PID);
+        *   }                               if (!tsk->flags & PF_EXITING) {
+        *  ...                                attach();
+        *  tsk->flags |= PF_EXITPIDONE;     } else {
+        *                                     if (!(tsk->flags & 
PF_EXITPIDONE))
+        *                                       return -EAGAIN;
+        *                                     return -ESRCH; <--- FAIL
+        *                                   }
+        *
+        * Returning ESRCH unconditionally is wrong here because the
+        * user space value has been changed by the exiting task.
+        *
+        * The same logic applies to the case where the exiting task is
+        * already gone.
+        */
+       if (get_futex_value_locked(&uval2, uaddr))
+               return -EFAULT;
+
+       /* If the user space value has changed, try again. */
+       if (uval2 != uval)
+               return -EAGAIN;
+
+       /*
+        * The exiting task did not have a robust list, the robust list was
+        * corrupted or the user space value in *uaddr is simply bogus.
+        * Give up and tell user space.
+        */
+       return -ESRCH;
+}
+
 /*
  * Lookup the task for the TID provided from user space and attach to
  * it after doing proper sanity checks.
  */
-static int attach_to_pi_owner(u32 uval, union futex_key *key,
+static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key 
*key,
                              struct futex_pi_state **ps)
 {
        pid_t pid = uval & FUTEX_TID_MASK;
@@ -1162,12 +1216,15 @@ static int attach_to_pi_owner(u32 uval,
        /*
         * We are the first waiter - try to look up the real owner and attach
         * the new pi_state to it, but bail out when TID = 0 [1]
+        *
+        * The !pid check is paranoid. None of the call sites should end up
+        * with pid == 0, but better safe than sorry. Let the caller retry
         */
        if (!pid)
-               return -ESRCH;
+               return -EAGAIN;
        p = find_get_task_by_vpid(pid);
        if (!p)
-               return -ESRCH;
+               return handle_exit_race(uaddr, uval, NULL);
 
        if (unlikely(p->flags & PF_KTHREAD)) {
                put_task_struct(p);
@@ -1187,7 +1244,7 @@ static int attach_to_pi_owner(u32 uval,
                 * set, we know that the task has finished the
                 * cleanup:
                 */
-               int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
+               int ret = handle_exit_race(uaddr, uval, p);
 
                raw_spin_unlock_irq(&p->pi_lock);
                put_task_struct(p);
@@ -1244,7 +1301,7 @@ static int lookup_pi_state(u32 __user *u
         * We are the first waiter - try to look up the owner based on
         * @uval and attach to it.
         */
-       return attach_to_pi_owner(uval, key, ps);
+       return attach_to_pi_owner(uaddr, uval, key, ps);
 }
 
 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1352,7 +1409,7 @@ static int futex_lock_pi_atomic(u32 __us
         * attach to the owner. If that fails, no harm done, we only
         * set the FUTEX_WAITERS bit in the user space variable.
         */
-       return attach_to_pi_owner(uval, key, ps);
+       return attach_to_pi_owner(uaddr, newval, key, ps);
 }
 
 /**

Reply via email to