From: Dave Chinner <[email protected]>

Convert VFS internal superblock inode iterators that cannot use
referenced inodes to the new super_iter_inodes_unsafe() iterator.
Dquot and inode eviction require this special handling due to
special eviction handling requirements. The special
nr_blockdev_pages() statistics code needs it as well, as this is
called from si_meminfo() and so can potentially be run from
locations where arbitrary blocking is not allowed or desirable.

New cases using this iterator need careful consideration.

Signed-off-by: Dave Chinner <[email protected]>
---
 block/bdev.c     | 24 +++++++++++----
 fs/inode.c       | 79 ++++++++++++++++++++++++++----------------------
 fs/quota/dquot.c | 72 ++++++++++++++++++++++++-------------------
 3 files changed, 102 insertions(+), 73 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 33f9c4605e3a..b5a362156ca1 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -472,16 +472,28 @@ void bdev_drop(struct block_device *bdev)
        iput(BD_INODE(bdev));
 }
 
+static int bdev_pages_count(struct inode *inode, void *data)
+{
+       long    *pages = data;
+
+       *pages += inode->i_mapping->nrpages;
+       return INO_ITER_DONE;
+}
+
 long nr_blockdev_pages(void)
 {
-       struct inode *inode;
        long ret = 0;
 
-       spin_lock(&blockdev_superblock->s_inode_list_lock);
-       list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
-               ret += inode->i_mapping->nrpages;
-       spin_unlock(&blockdev_superblock->s_inode_list_lock);
-
+       /*
+        * We can be called from contexts where blocking is not
+        * desirable. The count is advisory at best, and we only
+        * need to access the inode mapping. Hence as long as we
+        * have an inode existence guarantee, we can safely count
+        * the cached pages on each inode without needing reference
+        * counted inodes.
+        */
+       super_iter_inodes_unsafe(blockdev_superblock,
+                       bdev_pages_count, &ret);
        return ret;
 }
 
diff --git a/fs/inode.c b/fs/inode.c
index 0a53d8c34203..3f335f78c5b2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -761,8 +761,11 @@ static void evict(struct inode *inode)
  * Dispose-list gets a local list with local inodes in it, so it doesn't
  * need to worry about list corruption and SMP locks.
  */
-static void dispose_list(struct list_head *head)
+static bool dispose_list(struct list_head *head)
 {
+       if (list_empty(head))
+               return false;
+
        while (!list_empty(head)) {
                struct inode *inode;
 
@@ -772,6 +775,7 @@ static void dispose_list(struct list_head *head)
                evict(inode);
                cond_resched();
        }
+       return true;
 }
 
 /**
@@ -783,47 +787,50 @@ static void dispose_list(struct list_head *head)
  * so any inode reaching zero refcount during or after that call will
  * be immediately evicted.
  */
+static int evict_inode_fn(struct inode *inode, void *data)
+{
+       struct list_head *dispose = data;
+
+       spin_lock(&inode->i_lock);
+       if (atomic_read(&inode->i_count) ||
+           (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))) {
+               spin_unlock(&inode->i_lock);
+               return INO_ITER_DONE;
+       }
+
+       inode->i_state |= I_FREEING;
+       inode_lru_list_del(inode);
+       spin_unlock(&inode->i_lock);
+       list_add(&inode->i_lru, dispose);
+
+       /*
+        * If we've run long enough to need rescheduling, abort the
+        * iteration so we can return to evict_inodes() and dispose of the
+        * inodes before collecting more inodes to evict.
+        */
+       if (need_resched())
+               return INO_ITER_ABORT;
+       return INO_ITER_DONE;
+}
+
 void evict_inodes(struct super_block *sb)
 {
-       struct inode *inode, *next;
        LIST_HEAD(dispose);
 
-again:
-       spin_lock(&sb->s_inode_list_lock);
-       list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
-               if (atomic_read(&inode->i_count))
-                       continue;
-
-               spin_lock(&inode->i_lock);
-               if (atomic_read(&inode->i_count)) {
-                       spin_unlock(&inode->i_lock);
-                       continue;
-               }
-               if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
-                       spin_unlock(&inode->i_lock);
-                       continue;
-               }
-
-               inode->i_state |= I_FREEING;
-               inode_lru_list_del(inode);
-               spin_unlock(&inode->i_lock);
-               list_add(&inode->i_lru, &dispose);
-
+       do {
                /*
-                * We can have a ton of inodes to evict at unmount time given
-                * enough memory, check to see if we need to go to sleep for a
-                * bit so we don't livelock.
+                * We do not want to take references to inodes whilst iterating
+                * because we are trying to evict unreferenced inodes from
+                * the cache. Hence we need to use the unsafe iteration
+                * mechanism and do all the required inode validity checks in
+                * evict_inode_fn() to safely queue unreferenced inodes for
+                * eviction.
+                *
+                * We repeat the iteration until it doesn't find any more
+                * inodes to dispose of.
                 */
-               if (need_resched()) {
-                       spin_unlock(&sb->s_inode_list_lock);
-                       cond_resched();
-                       dispose_list(&dispose);
-                       goto again;
-               }
-       }
-       spin_unlock(&sb->s_inode_list_lock);
-
-       dispose_list(&dispose);
+               super_iter_inodes_unsafe(sb, evict_inode_fn, &dispose);
+       } while (dispose_list(&dispose));
 }
 EXPORT_SYMBOL_GPL(evict_inodes);
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b40410cd39af..ea0bd807fed7 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1075,41 +1075,51 @@ static int add_dquot_ref(struct super_block *sb, int 
type)
        return err;
 }
 
+struct dquot_ref_data {
+       int     type;
+       int     reserved;
+};
+
+static int remove_dquot_ref_fn(struct inode *inode, void *data)
+{
+       struct dquot_ref_data *ref = data;
+
+       spin_lock(&dq_data_lock);
+       if (!IS_NOQUOTA(inode)) {
+               struct dquot __rcu **dquots = i_dquot(inode);
+               struct dquot *dquot = srcu_dereference_check(
+                       dquots[ref->type], &dquot_srcu,
+                       lockdep_is_held(&dq_data_lock));
+
+#ifdef CONFIG_QUOTA_DEBUG
+               if (unlikely(inode_get_rsv_space(inode) > 0))
+                       ref->reserved++;
+#endif
+               rcu_assign_pointer(dquots[ref->type], NULL);
+               if (dquot)
+                       dqput(dquot);
+       }
+       spin_unlock(&dq_data_lock);
+       return INO_ITER_DONE;
+}
+
 static void remove_dquot_ref(struct super_block *sb, int type)
 {
-       struct inode *inode;
-#ifdef CONFIG_QUOTA_DEBUG
-       int reserved = 0;
-#endif
-
-       spin_lock(&sb->s_inode_list_lock);
-       list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-               /*
-                *  We have to scan also I_NEW inodes because they can already
-                *  have quota pointer initialized. Luckily, we need to touch
-                *  only quota pointers and these have separate locking
-                *  (dq_data_lock).
-                */
-               spin_lock(&dq_data_lock);
-               if (!IS_NOQUOTA(inode)) {
-                       struct dquot __rcu **dquots = i_dquot(inode);
-                       struct dquot *dquot = srcu_dereference_check(
-                               dquots[type], &dquot_srcu,
-                               lockdep_is_held(&dq_data_lock));
+       struct dquot_ref_data ref = {
+               .type = type,
+       };
 
+       /*
+        * We have to scan I_NEW inodes because they can already
+        * have quota pointer initialized. Luckily, we need to touch
+        * only quota pointers and these have separate locking
+        * (dq_data_lock) so the existence guarantee that
+        * super_iter_inodes_unsafe() provides inodes passed to
+        * remove_dquot_ref_fn() is sufficient for this operation.
+        */
+       super_iter_inodes_unsafe(sb, remove_dquot_ref_fn, &ref);
 #ifdef CONFIG_QUOTA_DEBUG
-                       if (unlikely(inode_get_rsv_space(inode) > 0))
-                               reserved = 1;
-#endif
-                       rcu_assign_pointer(dquots[type], NULL);
-                       if (dquot)
-                               dqput(dquot);
-               }
-               spin_unlock(&dq_data_lock);
-       }
-       spin_unlock(&sb->s_inode_list_lock);
-#ifdef CONFIG_QUOTA_DEBUG
-       if (reserved) {
+       if (ref.reserved) {
                printk(KERN_WARNING "VFS (%s): Writes happened after quota"
                        " was disabled thus quota information is probably "
                        "inconsistent. Please run quotacheck(8).\n", sb->s_id);
-- 
2.45.2


Reply via email to