From: Miklos Szeredi <[email protected]>

We check submounts before doing d_drop() on a non-empty directory dentry in
NFS (have_submounts()), but we do not exclude a racing mount.

 Process A: have_submounts() -> returns false
 Process B: mount() -> success
 Process A: d_drop()

This patch prepares the ground for the fix by doing the following
operations all under the same rename lock:

  have_submounts()
  shrink_dcache_parent()
  d_drop()

This is actually an optimization since have_submounts() and
shrink_dcache_parent() both traverse the same dentry tree separately.

Signed-off-by: Miklos Szeredi <[email protected]>
CC: David Howells <[email protected]>
CC: Steven Whitehouse <[email protected]>
CC: Trond Myklebust <[email protected]>
CC: Greg Kroah-Hartman <[email protected]>
---
 fs/dcache.c            | 157 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h |   1 +
 2 files changed, 158 insertions(+)

diff --git a/fs/dcache.c b/fs/dcache.c
index 87bdb53..020004d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1224,6 +1224,163 @@ void shrink_dcache_parent(struct dentry * parent)
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
 
+static int __check_submounts_and_drop(struct dentry *parent,
+                                            struct list_head *dispose)
+{
+       struct dentry *this_parent;
+       struct list_head *next;
+       unsigned seq;
+       int found = 0;
+       int locked = 0;
+
+       seq = read_seqbegin(&rename_lock);
+again:
+       this_parent = parent;
+       spin_lock(&this_parent->d_lock);
+repeat:
+       next = this_parent->d_subdirs.next;
+resume:
+       while (next != &this_parent->d_subdirs) {
+               struct list_head *tmp = next;
+               struct dentry *dentry;
+
+               dentry = list_entry(tmp, struct dentry, d_u.d_child);
+               next = tmp->next;
+
+               spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+               if (d_mountpoint(dentry)) {
+                       spin_unlock(&dentry->d_lock);
+                       found = -EBUSY;
+                       goto out;
+               }
+
+               /*
+                * move only zero ref count dentries to the dispose list.
+                *
+                * Those which are presently on the shrink list, being processed
+                * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
+                * loop in shrink_dcache_parent() might not make any progress
+                * and loop forever.
+                */
+               if (dentry->d_count) {
+                       dentry_lru_del(dentry);
+               } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+                       dentry_lru_move_list(dentry, dispose);
+                       dentry->d_flags |= DCACHE_SHRINK_LIST;
+                       found++;
+               }
+               /*
+                * We can return to the caller if we have found some (this
+                * ensures forward progress). We'll be coming back to find
+                * the rest.
+                */
+               if (found && need_resched()) {
+                       spin_unlock(&dentry->d_lock);
+                       goto out;
+               }
+
+               /*
+                * Descend a level if the d_subdirs list is non-empty.
+                */
+               if (!list_empty(&dentry->d_subdirs)) {
+                       spin_unlock(&this_parent->d_lock);
+                       spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
+                       this_parent = dentry;
+                       spin_acquire(&this_parent->d_lock.dep_map, 0, 1, 
_RET_IP_);
+                       goto repeat;
+               }
+
+               spin_unlock(&dentry->d_lock);
+       }
+       /*
+        * All done at this level ... ascend and resume the search.
+        */
+       if (this_parent != parent) {
+               struct dentry *child = this_parent;
+               this_parent = try_to_ascend(this_parent, locked, seq);
+               if (!this_parent)
+                       goto rename_retry;
+               next = child->d_u.d_child.next;
+               goto resume;
+       }
+       if (!locked && read_seqretry(&rename_lock, seq)) {
+               spin_unlock(&this_parent->d_lock);
+               goto rename_retry;
+       }
+       if (d_mountpoint(this_parent))
+               found = -EBUSY;
+       if (!found)
+               __d_drop(this_parent);
+out:
+       spin_unlock(&this_parent->d_lock);
+
+       if (locked)
+               write_sequnlock(&rename_lock);
+       return found;
+
+rename_retry:
+       if (found)
+               return found;
+       if (locked)
+               goto again;
+       locked = 1;
+       write_seqlock(&rename_lock);
+       goto again;
+}
+
+/**
+ * check_submounts_and_drop - prune dcache, check for submounts and drop
+ *
+ * All done as a single atomic operation relative to has_unlinked_ancestor().
+ * Returns 0 if successfully unhashed @parent.  If there were submounts then
+ * return -EBUSY.
+ *
+ * @dentry: dentry to prune and drop
+ */
+int check_submounts_and_drop(struct dentry *dentry)
+{
+       int ret = 0;
+
+       /* Negative dentries can be dropped without further checks */
+       if (!dentry->d_inode) {
+               d_drop(dentry);
+               goto out;
+       }
+
+       spin_lock(&dentry->d_lock);
+       if (d_unhashed(dentry))
+               goto out_unlock;
+       if (list_empty(&dentry->d_subdirs)) {
+               if (d_mountpoint(dentry)) {
+                       ret = -EBUSY;
+                       goto out_unlock;
+               }
+               __d_drop(dentry);
+               goto out_unlock;
+       }
+       spin_unlock(&dentry->d_lock);
+
+       for (;;) {
+               LIST_HEAD(dispose);
+               ret = __check_submounts_and_drop(dentry, &dispose);
+               if (!list_empty(&dispose))
+                       shrink_dentry_list(&dispose);
+
+               if (ret <= 0)
+                       break;
+
+               cond_resched();
+       }
+
+out:
+       return ret;
+
+out_unlock:
+       spin_unlock(&dentry->d_lock);
+       goto out;
+}
+EXPORT_SYMBOL(check_submounts_and_drop);
+
 /**
  * __d_alloc   -       allocate a dcache entry
  * @sb: filesystem it will belong to
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index b90337c..41b21ca 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -251,6 +251,7 @@ extern void d_prune_aliases(struct inode *);
 
 /* test whether we have any submounts in a subdir tree */
 extern int have_submounts(struct dentry *);
+extern int check_submounts_and_drop(struct dentry *);
 
 /*
  * This adds the entry to the hash queues.
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to