On Wed, Feb 23, 2005 at 06:49:46PM +0000, Jamie Lokier wrote:
> Linus Torvalds wrote:
> > > I suggest putting it into futex.c, and make it an inline function
> > > which takes "u32 __user *".
> > 
> > Agreed, except we've traditionally just made it "int __user *".
> 
> The type signatures in futex.c are a bit mixed up - most places say
> "int __user *" but sys_futex() says "u32 __user *".  get_futex_key
> uses sizeof(u32) to check the address.

How's this? I went with get_val_no_fault(), since it isn't really a
get_user.*() any more (ptr being passed in), and no_paging is a little
misleading (not all faults are due to paging).


-----

Some futex functions do get_user calls while holding mmap_sem for
reading. If get_user() faults, and another thread happens to be in mmap
(or somewhere else holding waiting on down_write for the same semaphore),
then do_page_fault will deadlock. Most architectures seem to be exposed
to this.

To avoid it, make sure the page is available. If not, release the
semaphore, fault it in and retry.

I also found another exposure by inspection, moving some of the code
around avoids the possible deadlock there.

Signed-off-by: Olof Johansson <[EMAIL PROTECTED]>


Index: linux-2.5/kernel/futex.c
===================================================================
--- linux-2.5.orig/kernel/futex.c       2005-02-21 16:09:38.000000000 -0600
+++ linux-2.5/kernel/futex.c    2005-02-23 13:10:16.000000000 -0600
@@ -258,6 +258,18 @@
        }
 }
 
+static inline int get_val_no_fault(int *dest, int __user *from)
+{
+       int ret;
+
+       inc_preempt_count();
+       ret = __copy_from_user_inatomic(dest, from, sizeof(int));
+       dec_preempt_count();
+       preempt_check_resched();
+
+       return ret;
+}
+
 /*
  * The hash bucket lock must be held when this is called.
  * Afterwards, the futex_q must not be accessed.
@@ -329,6 +341,7 @@
        int ret, drop_count = 0;
        unsigned int nqueued;
 
+ retry:
        down_read(&current->mm->mmap_sem);
 
        ret = get_futex_key(uaddr1, &key1);
@@ -355,9 +368,20 @@
                   before *uaddr1.  */
                smp_mb();
 
-               if (get_user(curval, (int __user *)uaddr1) != 0) {
-                       ret = -EFAULT;
-                       goto out;
+               ret = get_val_no_fault(&curval, (int __user *)uaddr1);
+
+               if (unlikely(ret)) {
+                       /* If we would have faulted, release mmap_sem, fault
+                        * it in and start all over again.
+                        */
+                       up_read(&current->mm->mmap_sem);
+
+                       ret = get_user(curval, (int __user *)uaddr1);
+
+                       if (!ret)
+                               goto retry;
+
+                       return ret;
                }
                if (curval != *valp) {
                        ret = -EAGAIN;
@@ -480,6 +504,7 @@
        int ret, curval;
        struct futex_q q;
 
+ retry:
        down_read(&current->mm->mmap_sem);
 
        ret = get_futex_key(uaddr, &q.key);
@@ -508,9 +533,23 @@
         * We hold the mmap semaphore, so the mapping cannot have changed
         * since we looked it up in get_futex_key.
         */
-       if (get_user(curval, (int __user *)uaddr) != 0) {
-               ret = -EFAULT;
-               goto out_unqueue;
+
+       ret = get_val_no_fault(&curval, (int __user *)uaddr);
+
+       if (unlikely(ret)) {
+               /* If we would have faulted, release mmap_sem, fault it in and
+                * start all over again.
+                */
+               up_read(&current->mm->mmap_sem);
+
+               if (!unqueue_me(&q)) /* There's a chance we got woken already */
+                       return 0;
+
+               ret = get_user(curval, (int __user *)uaddr);
+
+               if (!ret)
+                       goto retry;
+               return ret;
        }
        if (curval != val) {
                ret = -EWOULDBLOCK;
Index: linux-2.5/mm/mempolicy.c
===================================================================
--- linux-2.5.orig/mm/mempolicy.c       2005-02-04 00:27:40.000000000 -0600
+++ linux-2.5/mm/mempolicy.c    2005-02-23 12:53:22.000000000 -0600
@@ -524,9 +524,13 @@
        } else
                pval = pol->policy;
 
-       err = -EFAULT;
+       if (vma) {
+               up_read(&current->mm->mmap_sem);
+               vma = NULL;
+       }
+
        if (policy && put_user(pval, policy))
-               goto out;
+               return -EFAULT;
 
        err = 0;
        if (nmask) {
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to