On Tue, 19 Jan 2016, Bueso wrote:
        /*
         * Private mappings are handled in a simple way.
         *
+        * If the futex key is stored on an anonymous page, then the associated
+        * object is the mm which is implicitly pinned by the calling process.
+        *
         * NOTE: When userspace waits on a MAP_SHARED mapping, even if
         * it's a read-only handle, it's expected that futexes attach to
         * the object not the particular process.
@@ -566,16 +592,61 @@ again:
                key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                key->private.mm = mm;
                key->private.address = address;
+
+               get_futex_key_refs(key); /* implies MB (B) */
+
        } else {
+               struct inode *inode;
+
+               /*
+                * The associtated futex object in this case is the inode and
+                * the page->mapping must be traversed. Ordinarily this should
+                * be stabilised under page lock but it's not strictly
+                * necessary in this case as we just want to pin the inode, not
+                * update radix tree or anything like that.
+                *
+                * The RCU read lock is taken as the inode is finally freed
+                * under RCU. If the mapping still matches expectations then the
+                * mapping->host can be safely accessed as being a valid inode.
+                */
+               rcu_read_lock();
+               if (READ_ONCE(page->mapping) != mapping ||
+                   !mapping->host) {
+                       rcu_read_unlock();
+                       put_page(page);
+
+                       goto again;
+               }
+               inode = READ_ONCE(mapping->host);
+
+               /*
+                * Take a reference unless it is about to be freed. Previously
+                * this reference was taken by ihold under the page lock
+                * pinning the inode in place so i_lock was unnecessary. The
+                * only way for this check to fail is if the inode was
+                * truncated in parallel so warn for now if this happens.
+                *
+                * We are not calling into get_futex_key_refs() in file-backed
+                * cases, therefore a successful atomic_inc return below will
+                * guarantee that get_futex_key() will continue to imply MB (B).
+                */
+               if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
+                       rcu_read_unlock();
+                       put_page(page);
+
+                       goto again;
+               }
+
+               /* Should be impossible but lets be paranoid for now */
+               BUG_ON(inode->i_mapping != mapping);

Hmm, do we want to transform this into an if and do rcu unlock and then just
call BUG()? I't doesn't matter at this point _anyway_, but it would be the right
thing to do, no?

+
                key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-               key->shared.inode = mapping->host;
+               key->shared.inode = inode;
                key->shared.pgoff = basepage_index(page);
+               rcu_read_unlock();
        }

-       get_futex_key_refs(key); /* implies MB (B) */
-
out:
-       unlock_page(page);
        put_page(page);
        return err;
}

Reply via email to