Here are the changes to the series I have accumulated so far.  Mainly:

o truncate_inode_pages_final() that sets mapping_set_exiting() and
  uses ordered but unlocked nrshadows & nrpages reads to skip the tree
  lock acquisition and IRQ disabling on empty page cache trees.

o revert all efforts to make the lru_lock IRQ-safe just to silence
  lockdep.  Also solves the problem of the list_lru_init() key API.

o in the shadow shrinker, drop the lru_lock after mapping->tree_lock
  has been acquired.  The latter pins the inode by preventing the
  final truncate from removing shadow entries, so we can safely
  release the lru lock once mapping->tree_lock is acquired and the
  node is taken off the list.

o changed radix_tree_node member names and documented them better

o fixed typos

As we agreed to keep the shadow node lru management non-lazy for now,
we don't need to worry about the lifetime of radix tree nodes in the
shrinker beyond taking it off the lru list with the lru_lock and
mapping->tree_lock held.  No complicated RCU scheme required.

---

diff --git a/Documentation/filesystems/porting 
b/Documentation/filesystems/porting
index f089058..fc0de70 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -295,9 +295,9 @@ in the beginning of ->setattr unconditionally.
        ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should
 be used instead.  It gets called whenever the inode is evicted, whether it has
 remaining links or not.  Caller does *not* evict the pagecache or 
inode-associated
-metadata buffers; getting rid of those is responsibility of method, as it had
-been for ->delete_inode(). Caller makes sure async writeback cannot be running
-for the inode while (or after) ->evict_inode() is called.
+metadata buffers; the method has to use truncate_inode_pages_final() to get rid
+of those. Caller makes sure async writeback cannot be running for the inode 
while
+(or after) ->evict_inode() is called.
 
        ->drop_inode() returns int now; it's called on final iput() with
 inode->i_lock held and it returns true if filesystems wants the inode to be
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c 
b/drivers/staging/lustre/lustre/llite/llite_lib.c
index b868c2b..79cbc9c 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -1817,7 +1817,7 @@ void ll_delete_inode(struct inode *inode)
                cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
                                   CL_FSYNC_DISCARD, 1);
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 
        /* Workaround for LU-118 */
        if (inode->i_data.nrpages) {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 94de6d1..e6716c2 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -444,7 +444,7 @@ void v9fs_evict_inode(struct inode *inode)
 {
        struct v9fs_inode *v9inode = V9FS_I(inode);
 
-       truncate_inode_pages(inode->i_mapping, 0);
+       truncate_inode_pages_final(inode->i_mapping);
        clear_inode(inode);
        filemap_fdatawrite(inode->i_mapping);
 
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 0e092d0..96df91e 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -259,7 +259,7 @@ affs_evict_inode(struct inode *inode)
 {
        unsigned long cache_page;
        pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, 
inode->i_nlink);
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 
        if (!inode->i_nlink) {
                inode->i_size = 0;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 789bc25..2bbe60e 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -422,7 +422,7 @@ void afs_evict_inode(struct inode *inode)
 
        ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
 
        afs_give_up_callback(vnode);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8defc6b..29aa5cf 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -172,7 +172,7 @@ static void bfs_evict_inode(struct inode *inode)
 
        dprintf("ino=%08lx\n", ino);
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        invalidate_inode_buffers(inode);
        clear_inode(inode);
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 391ffe5..c7a7def 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -419,7 +419,7 @@ static void bdev_evict_inode(struct inode *inode)
 {
        struct block_device *bdev = &BDEV_I(inode)->bdev;
        struct list_head *p;
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        invalidate_inode_buffers(inode); /* is it needed here? */
        clear_inode(inode);
        spin_lock(&bdev_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 51e3afa..d3e4983 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4471,7 +4471,7 @@ void btrfs_evict_inode(struct inode *inode)
 
        trace_btrfs_inode_evict(inode);
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
                               btrfs_is_free_space_inode(inode)))
                goto no_delete;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 77fc5e1..d795c50 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -286,7 +286,7 @@ cifs_destroy_inode(struct inode *inode)
 static void
 cifs_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        cifs_fscache_release_inode_cookie(inode);
 }
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 4dcc0d8..43a5b38 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -250,7 +250,7 @@ static void coda_put_super(struct super_block *sb)
 
 static void coda_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        coda_cache_clear_inode(inode);
 }
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index e879cf8..afa1b81 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -132,7 +132,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct 
kstatfs *buf)
  */
 static void ecryptfs_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        iput(ecryptfs_inode_to_lower(inode));
 }
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a52a5d2..d9ff4d3 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1479,7 +1479,7 @@ void exofs_evict_inode(struct inode *inode)
        struct ore_io_state *ios;
        int ret;
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 
        /* TODO: should do better here */
        if (inode->i_nlink || is_bad_inode(inode))
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c260de6..115fa58 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -78,7 +78,7 @@ void ext2_evict_inode(struct inode * inode)
                dquot_drop(inode);
        }
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 
        if (want_delete) {
                sb_start_intwrite(inode->i_sb);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2bd8548..153f4be 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -228,7 +228,7 @@ void ext3_evict_inode (struct inode *inode)
                log_wait_commit(journal, commit_tid);
                filemap_write_and_wait(&inode->i_data);
        }
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 
        ext3_discard_reservation(inode);
        rsv = ei->i_block_alloc_info;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e274e9c..3b75e70 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -214,7 +214,7 @@ void ext4_evict_inode(struct inode *inode)
                        jbd2_complete_transaction(journal, commit_tid);
                        filemap_write_and_wait(&inode->i_data);
                }
-               truncate_inode_pages(&inode->i_data, 0);
+               truncate_inode_pages_final(&inode->i_data);
 
                WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
                goto no_delete;
@@ -225,7 +225,7 @@ void ext4_evict_inode(struct inode *inode)
 
        if (ext4_should_order_data(inode))
                ext4_begin_ordered_truncate(inode, 0);
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 
        WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
        if (is_bad_inode(inode))
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 9339cd2..0bd44f8 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -246,7 +246,7 @@ void f2fs_evict_inode(struct inode *inode)
        int ilock;
 
        trace_f2fs_evict_inode(inode);
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 
        if (inode->i_ino == F2FS_NODE_INO(sbi) ||
                        inode->i_ino == F2FS_META_INO(sbi))
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 0062da2..fe802d8 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(fat_build_inode);
 
 static void fat_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        if (!inode->i_nlink) {
                inode->i_size = 0;
                fat_truncate_blocks(inode, 0);
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index f47df72..363e3ae 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -354,7 +354,7 @@ static void vxfs_i_callback(struct rcu_head *head)
 void
 vxfs_evict_inode(struct inode *ip)
 {
-       truncate_inode_pages(&ip->i_data, 0);
+       truncate_inode_pages_final(&ip->i_data);
        clear_inode(ip);
        call_rcu(&ip->i_rcu, vxfs_i_callback);
 }
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index a8ce6da..09d7fa0 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -123,7 +123,7 @@ static void fuse_destroy_inode(struct inode *inode)
 
 static void fuse_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        if (inode->i_sb->s_flags & MS_ACTIVE) {
                struct fuse_conn *fc = get_fuse_conn(inode);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e5639de..ac96a99 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1525,7 +1525,7 @@ out_unlock:
                fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
 out:
        /* Case 3 starts here */
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        gfs2_rs_delete(ip);
        gfs2_ordered_del_inode(ip);
        clear_inode(inode);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 380ab31..9e2fecd 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -547,7 +547,7 @@ out:
 
 void hfs_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) {
                HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4c4d142..b9436d9 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -161,7 +161,7 @@ static int hfsplus_write_inode(struct inode *inode,
 static void hfsplus_evict_inode(struct inode *inode)
 {
        hfs_dbg(INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino);
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        if (HFSPLUS_IS_RSRC(inode)) {
                HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2543728..0c9f640 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -239,7 +239,7 @@ static struct inode *hostfs_alloc_inode(struct super_block 
*sb)
 
 static void hostfs_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        if (HOSTFS_I(inode)->fd != -1) {
                close_file(&HOSTFS_I(inode)->fd);
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 9edeeb0..50a4273 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -304,7 +304,7 @@ void hpfs_write_if_changed(struct inode *inode)
 
 void hpfs_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        if (!inode->i_nlink) {
                hpfs_lock(inode->i_sb);
diff --git a/fs/inode.c b/fs/inode.c
index 7858fb7..093864e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -546,26 +546,10 @@ static void evict(struct inode *inode)
         */
        inode_wait_for_writeback(inode);
 
-       /*
-        * Page reclaim can not do iput() and thus can race with the
-        * inode teardown.  Tell it when the address space is exiting,
-        * so that it does not install eviction information after the
-        * final truncate has begun.
-        *
-        * As truncation uses a lockless tree lookup, acquire the
-        * spinlock to make sure any ongoing tree modification that
-        * does not see AS_EXITING is completed before starting the
-        * final truncate.
-        */
-       spin_lock_irq(&inode->i_data.tree_lock);
-       mapping_set_exiting(&inode->i_data);
-       spin_unlock_irq(&inode->i_data.tree_lock);
-
        if (op->evict_inode) {
                op->evict_inode(inode);
        } else {
-               if (inode->i_data.nrpages || inode->i_data.nrshadows)
-                       truncate_inode_pages(&inode->i_data, 0);
+               truncate_inode_pages_final(&inode->i_data);
                clear_inode(inode);
        }
        if (S_ISBLK(inode->i_mode) && inode->i_bdev)
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index fe3c052..00ed6c6 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -241,7 +241,7 @@ void jffs2_evict_inode (struct inode *inode)
 
        jffs2_dbg(1, "%s(): ino #%lu mode %o\n",
                  __func__, inode->i_ino, inode->i_mode);
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        jffs2_do_clear_inode(c, f);
 }
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index f4aab71..6f8fe72 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -154,7 +154,7 @@ void jfs_evict_inode(struct inode *inode)
                dquot_initialize(inode);
 
                if (JFS_IP(inode)->fileset == FILESYSTEM_I) {
-                       truncate_inode_pages(&inode->i_data, 0);
+                       truncate_inode_pages_final(&inode->i_data);
 
                        if (test_cflag(COMMIT_Freewmap, inode))
                                jfs_free_zero_link(inode);
@@ -168,7 +168,7 @@ void jfs_evict_inode(struct inode *inode)
                        dquot_free_inode(inode);
                }
        } else {
-               truncate_inode_pages(&inode->i_data, 0);
+               truncate_inode_pages_final(&inode->i_data);
        }
        clear_inode(inode);
        dquot_drop(inode);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 9a59cba..4814031 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -2180,7 +2180,7 @@ void logfs_evict_inode(struct inode *inode)
                        do_delete_inode(inode);
                }
        }
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
 
        /* Cheaper version of write_inode.  All changes are concealed in
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 0332109..03aaeb1 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -26,7 +26,7 @@ static int minix_remount (struct super_block * sb, int * 
flags, char * data);
 
 static void minix_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        if (!inode->i_nlink) {
                inode->i_size = 0;
                minix_truncate(inode);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 4659da6..e728061 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -296,7 +296,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info 
*info)
 static void
 ncp_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
 
        if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index eda8879..fbc38a6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -128,7 +128,7 @@ EXPORT_SYMBOL_GPL(nfs_clear_inode);
 
 void nfs_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        nfs_clear_inode(inode);
 }
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index e26acdd..f2a5c44 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -98,7 +98,7 @@ static int nfs4_write_inode(struct inode *inode, struct 
writeback_control *wbc)
  */
 static void nfs4_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        pnfs_return_layout(inode);
        pnfs_destroy_layout(NFS_I(inode));
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 42fcbe3..b9c5726 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -783,16 +783,14 @@ void nilfs_evict_inode(struct inode *inode)
        int ret;
 
        if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
-               if (inode->i_data.nrpages || inode->i_data.nrshadows)
-                       truncate_inode_pages(&inode->i_data, 0);
+               truncate_inode_pages_final(&inode->i_data);
                clear_inode(inode);
                nilfs_clear_inode(inode);
                return;
        }
        nilfs_transaction_begin(sb, &ti, 0); /* never fails */
 
-       if (inode->i_data.nrpages || inode->i_data.nrshadows)
-               truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 
        /* TODO: some of the following operations may fail.  */
        nilfs_truncate_bmap(ii, 0);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 2778b02..bd50adc1 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2259,7 +2259,7 @@ void ntfs_evict_big_inode(struct inode *vi)
 {
        ntfs_inode *ni = NTFS_I(vi);
 
-       truncate_inode_pages(&vi->i_data, 0);
+       truncate_inode_pages_final(&vi->i_data);
        clear_inode(vi);
 
 #ifdef NTFS_RW
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index f87f9bd..f1c46a7 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -951,7 +951,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
                (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data);
        if (sync_data)
                filemap_write_and_wait(inode->i_mapping);
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 }
 
 static void ocfs2_delete_inode(struct inode *inode)
@@ -1167,7 +1167,7 @@ void ocfs2_evict_inode(struct inode *inode)
            (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) {
                ocfs2_delete_inode(inode);
        } else {
-               truncate_inode_pages(&inode->i_data, 0);
+               truncate_inode_pages_final(&inode->i_data);
        }
        ocfs2_clear_inode(inode);
 }
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d8b0afd..ec58c76 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -183,7 +183,7 @@ int omfs_sync_inode(struct inode *inode)
  */
 static void omfs_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
 
        if (inode->i_nlink)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8eaa1ba..9ca0f08 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -35,7 +35,7 @@ static void proc_evict_inode(struct inode *inode)
        const struct proc_ns_operations *ns_ops;
        void *ns;
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
 
        /* Stop tracking associated processes */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ad62bdbb..bc8b800 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -35,7 +35,7 @@ void reiserfs_evict_inode(struct inode *inode)
        if (!inode->i_nlink && !is_bad_inode(inode))
                dquot_initialize(inode);
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        if (inode->i_nlink)
                goto no_delete;
 
diff --git a/fs/super.c b/fs/super.c
index a958d52..0225c20 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -196,9 +196,9 @@ static struct super_block *alloc_super(struct 
file_system_type *type, int flags)
                INIT_HLIST_BL_HEAD(&s->s_anon);
                INIT_LIST_HEAD(&s->s_inodes);
 
-               if (list_lru_init(&s->s_dentry_lru, NULL))
+               if (list_lru_init(&s->s_dentry_lru))
                        goto err_out;
-               if (list_lru_init(&s->s_inode_lru, NULL))
+               if (list_lru_init(&s->s_inode_lru))
                        goto err_out_dentry_lru;
 
                INIT_LIST_HEAD(&s->s_mounts);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 963f910..bd0dd8d 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -309,7 +309,7 @@ void sysfs_evict_inode(struct inode *inode)
 {
        struct sysfs_dirent *sd  = inode->i_private;
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        sysfs_put(sd);
 }
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index c327d4e..5625ca9 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -295,7 +295,7 @@ int sysv_sync_inode(struct inode *inode)
 
 static void sysv_evict_inode(struct inode *inode)
 {
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        if (!inode->i_nlink) {
                inode->i_size = 0;
                sysv_truncate(inode);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3e4aa72..b9ac1f3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -351,7 +351,7 @@ static void ubifs_evict_inode(struct inode *inode)
        dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
        ubifs_assert(!atomic_read(&inode->i_count));
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
 
        if (inode->i_nlink)
                goto done;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 062b792..af6f4c3 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -146,8 +146,8 @@ void udf_evict_inode(struct inode *inode)
                want_delete = 1;
                udf_setsize(inode, 0);
                udf_update_inode(inode, IS_SYNC(inode));
-       } else
-               truncate_inode_pages(&inode->i_data, 0);
+       }
+       truncate_inode_pages_final(&inode->i_data);
        invalidate_inode_buffers(inode);
        clear_inode(inode);
        if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index c8ca960..61e8a9b 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -885,7 +885,7 @@ void ufs_evict_inode(struct inode * inode)
        if (!inode->i_nlink && !is_bad_inode(inode))
                want_delete = 1;
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        if (want_delete) {
                loff_t old_i_size;
                /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c49cbce..2634700 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1670,7 +1670,7 @@ xfs_alloc_buftarg(
        if (xfs_setsize_buftarg_early(btp, bdev))
                goto error;
 
-       if (list_lru_init(&btp->bt_lru, NULL))
+       if (list_lru_init(&btp->bt_lru))
                goto error;
 
        btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 57d6aa9..3e6c2e6 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -831,7 +831,7 @@ xfs_qm_init_quotainfo(
 
        qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
 
-       if ((error = list_lru_init(&qinf->qi_lru, NULL))) {
+       if ((error = list_lru_init(&qinf->qi_lru))) {
                kmem_free(qinf);
                mp->m_quotainfo = NULL;
                return error;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 15188cc..47ce25d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1006,7 +1006,7 @@ xfs_fs_evict_inode(
 
        trace_xfs_evict_inode(ip);
 
-       truncate_inode_pages(&inode->i_data, 0);
+       truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        XFS_STATS_INC(vn_rele);
        XFS_STATS_INC(vn_remove);
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index b970a45..3ce5417 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -32,7 +32,7 @@ struct list_lru {
 };
 
 void list_lru_destroy(struct list_lru *lru);
-int list_lru_init(struct list_lru *lru, struct lock_class_key *key);
+int list_lru_init(struct list_lru *lru);
 
 /**
  * list_lru_add: add an element to the lru list's tail
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c09ef3a..5449e7a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1588,6 +1588,7 @@ vm_unmapped_area(struct vm_unmapped_area_info *info)
 extern void truncate_inode_pages(struct address_space *, loff_t);
 extern void truncate_inode_pages_range(struct address_space *,
                                       loff_t lstart, loff_t lend);
+extern void truncate_inode_pages_final(struct address_space *);
 
 /* generic vm_area_ops exported for stackable file systems */
 extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 29df11f..33170db 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -88,15 +88,17 @@ struct radix_tree_node {
        unsigned int    path;   /* Offset in parent & height from the bottom */
        unsigned int    count;
        union {
-               /* Used when ascending tree */
                struct {
+                       /* Used when ascending tree */
                        struct radix_tree_node *parent;
-                       void *private;
+                       /* For tree user */
+                       void *private_data;
                };
                /* Used when freeing node */
                struct rcu_head rcu_head;
        };
-       struct list_head lru;
+       /* For tree user */
+       struct list_head private_list;
        void __rcu      *slots[RADIX_TREE_MAP_SIZE];
        unsigned long   tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1865cd2..0a08953 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1425,7 +1425,7 @@ radix_tree_node_ctor(void *arg)
        struct radix_tree_node *node = arg;
 
        memset(node, 0, sizeof(*node));
-       INIT_LIST_HEAD(&node->lru);
+       INIT_LIST_HEAD(&node->private_list);
 }
 
 static __init unsigned long __maxindex(unsigned int height)
diff --git a/mm/filemap.c b/mm/filemap.c
index 79a7546..b93e223 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -120,8 +120,17 @@ static void page_cache_tree_delete(struct address_space 
*mapping,
 
        __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
 
-       if (shadow)
+       if (shadow) {
                mapping->nrshadows++;
+               /*
+                * Make sure the nrshadows update is committed before
+                * the nrpages update so that final truncate racing
+                * with reclaim does not see both counters 0 at the
+                * same time and miss a shadow entry.
+                */
+               smp_wmb();
+       }
+       mapping->nrpages--;
 
        if (!node) {
                /* Clear direct pointer tags in root node */
@@ -148,9 +157,10 @@ static void page_cache_tree_delete(struct address_space 
*mapping,
                        return;
 
        /* Only shadow entries in there, keep track of this node */
-       if (!(node->count & RADIX_TREE_COUNT_MASK) && list_empty(&node->lru)) {
-               node->private = mapping;
-               list_lru_add(&workingset_shadow_nodes, &node->lru);
+       if (!(node->count & RADIX_TREE_COUNT_MASK) &&
+           list_empty(&node->private_list)) {
+               node->private_data = mapping;
+               list_lru_add(&workingset_shadow_nodes, &node->private_list);
        }
 }
 
@@ -178,7 +188,7 @@ void __delete_from_page_cache(struct page *page, void 
*shadow)
 
        page->mapping = NULL;
        /* Leave page->index set: truncation lookup relies upon it */
-       mapping->nrpages--;
+
        __dec_zone_page_state(page, NR_FILE_PAGES);
        if (PageSwapBacked(page))
                __dec_zone_page_state(page, NR_SHMEM);
@@ -518,11 +528,13 @@ static int page_cache_tree_insert(struct address_space 
*mapping,
                        node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
        }
        radix_tree_replace_slot(slot, page);
+       mapping->nrpages++;
        if (node) {
                node->count++;
                /* Installed page, can't be shadow-only anymore */
-               if (!list_empty(&node->lru))
-                       list_lru_del(&workingset_shadow_nodes, &node->lru);
+               if (!list_empty(&node->private_list))
+                       list_lru_del(&workingset_shadow_nodes,
+                                    &node->private_list);
        }
        return 0;
 }
@@ -557,7 +569,6 @@ static int __add_to_page_cache_locked(struct page *page,
        radix_tree_preload_end();
        if (unlikely(error))
                goto err_insert;
-       mapping->nrpages++;
        __inc_zone_page_state(page, NR_FILE_PAGES);
        spin_unlock_irq(&mapping->tree_lock);
        trace_mm_filemap_add_to_page_cache(page);
diff --git a/mm/list_lru.c b/mm/list_lru.c
index c357e8f..72f9dec 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -114,7 +114,7 @@ restart:
 }
 EXPORT_SYMBOL_GPL(list_lru_walk_node);
 
-int list_lru_init(struct list_lru *lru, struct lock_class_key *key)
+int list_lru_init(struct list_lru *lru)
 {
        int i;
        size_t size = sizeof(*lru->node) * nr_node_ids;
@@ -126,8 +126,6 @@ int list_lru_init(struct list_lru *lru, struct 
lock_class_key *key)
        nodes_clear(lru->active_nodes);
        for (i = 0; i < nr_node_ids; i++) {
                spin_lock_init(&lru->node[i].lock);
-               if (key)
-                       lockdep_set_class(&lru->node[i].lock, key);
                INIT_LIST_HEAD(&lru->node[i].list);
                lru->node[i].nr_items = 0;
        }
diff --git a/mm/truncate.c b/mm/truncate.c
index 9cf5f88..5c2615d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -48,8 +48,9 @@ static void clear_exceptional_entry(struct address_space 
*mapping,
                goto unlock;
        node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
        /* No more shadow entries, stop tracking the node */
-       if (!(node->count >> RADIX_TREE_COUNT_SHIFT) && !list_empty(&node->lru))
-               list_lru_del(&workingset_shadow_nodes, &node->lru);
+       if (!(node->count >> RADIX_TREE_COUNT_SHIFT) &&
+           !list_empty(&node->private_list))
+               list_lru_del(&workingset_shadow_nodes, &node->private_list);
        __radix_tree_delete_node(&mapping->page_tree, node);
 unlock:
        spin_unlock_irq(&mapping->tree_lock);
@@ -407,6 +408,53 @@ void truncate_inode_pages(struct address_space *mapping, 
loff_t lstart)
 EXPORT_SYMBOL(truncate_inode_pages);
 
 /**
+ * truncate_inode_pages_final - truncate *all* pages before inode dies
+ * @mapping: mapping to truncate
+ *
+ * Called under (and serialized by) inode->i_mutex.
+ *
+ * Filesystems have to use this in the .evict_inode path to inform the
+ * VM that this is the final truncate and the inode is going away.
+ */
+void truncate_inode_pages_final(struct address_space *mapping)
+{
+       unsigned long nrshadows;
+       unsigned long nrpages;
+
+       /*
+        * Page reclaim can not participate in regular inode lifetime
+        * management (can't call iput()) and thus can race with the
+        * inode teardown.  Tell it when the address space is exiting,
+        * so that it does not install eviction information after the
+        * final truncate has begun.
+        */
+       mapping_set_exiting(mapping);
+
+       /*
+        * When reclaim installs eviction entries, it increases
+        * nrshadows first, then decreases nrpages.  Make sure we see
+        * this in the right order or we might miss an entry.
+        */
+       nrpages = mapping->nrpages;
+       smp_rmb();
+       nrshadows = mapping->nrshadows;
+
+       if (nrpages || nrshadows) {
+               /*
+                * As truncation uses a lockless tree lookup, acquire
+                * the spinlock to make sure any ongoing tree
+                * modification that does not see AS_EXITING is
+                * completed before starting the final truncate.
+                */
+               spin_lock_irq(&mapping->tree_lock);
+               spin_unlock_irq(&mapping->tree_lock);
+
+               truncate_inode_pages(mapping, 0);
+       }
+}
+EXPORT_SYMBOL(truncate_inode_pages_final);
+
+/**
  * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
  * @mapping: the address_space which holds the pages to invalidate
  * @start: the offset 'from' which to invalidate
diff --git a/mm/workingset.c b/mm/workingset.c
index ba8f0dd..2c3b5ad 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -35,7 +35,7 @@
  *
  *             Access frequency and refault distance
  *
- * A workload is trashing when its pages are frequently used but they
+ * A workload is thrashing when its pages are frequently used but they
  * are evicted from the inactive list every time before another access
  * would have promoted them to the active list.
  *
@@ -62,7 +62,7 @@
  *
  * Approximating inactive page access frequency - Observations:
  *
- * 1. When a page is accesed for the first time, it is added to the
+ * 1. When a page is accessed for the first time, it is added to the
  *    head of the inactive list, slides every existing inactive page
  *    towards the tail by one slot, and pushes the current tail page
  *    out of memory.
@@ -259,9 +259,6 @@ void workingset_activation(struct page *page)
  * slightly higher threshold than regular shrinkers so we don't
  * discard the entries too eagerly - after all, during light memory
  * pressure is exactly when we need them.
- *
- * The list_lru lock nests inside the IRQ-safe mapping->tree_lock, so
- * we have to disable IRQs for any list_lru operation as well.
  */
 
 struct list_lru workingset_shadow_nodes;
@@ -269,47 +266,47 @@ struct list_lru workingset_shadow_nodes;
 static unsigned long count_shadow_nodes(struct shrinker *shrinker,
                                        struct shrink_control *sc)
 {
-       unsigned long count;
-
-       local_irq_disable();
-       count = list_lru_count_node(&workingset_shadow_nodes, sc->nid);
-       local_irq_enable();
-
-       return count;
+       return list_lru_count_node(&workingset_shadow_nodes, sc->nid);
 }
 
-#define NOIRQ_BATCH 32
-
 static enum lru_status shadow_lru_isolate(struct list_head *item,
                                          spinlock_t *lru_lock,
                                          void *arg)
 {
        struct address_space *mapping;
        struct radix_tree_node *node;
-       unsigned long *batch = arg;
        unsigned int i;
 
-       node = container_of(item, struct radix_tree_node, lru);
-       mapping = node->private;
+       /*
+        * Page cache insertions and deletions synchroneously maintain
+        * the shadow node LRU under the mapping->tree_lock and the
+        * lru_lock.  Because the page cache tree is emptied before
+        * the inode can be destroyed, holding the lru_lock pins any
+        * address_space that has radix tree nodes on the LRU.
+        *
+        * We can then safely transition to the mapping->tree_lock to
+        * pin only the address_space of the particular node we want
+        * to reclaim, take the node off-LRU, and drop the lru_lock.
+        */
+
+       node = container_of(item, struct radix_tree_node, private_list);
+       mapping = node->private_data;
 
-       /* Don't disable IRQs for too long */
-       if (--(*batch) == 0) {
-               spin_unlock_irq(lru_lock);
-               *batch = NOIRQ_BATCH;
-               spin_lock_irq(lru_lock);
-               return LRU_RETRY;
+       /* Coming from the list, invert the lock order */
+       if (!spin_trylock_irq(&mapping->tree_lock)) {
+               spin_unlock(lru_lock);
+               goto out_retry;
        }
 
-       /* Coming from the list, inverse the lock order */
-       if (!spin_trylock(&mapping->tree_lock))
-               return LRU_SKIP;
-
        /*
         * The nodes should only contain one or more shadow entries,
         * no pages, so we expect to be able to remove them all and
         * delete and free the empty node afterwards.
         */
 
+       list_del_init(&node->private_list);
+       spin_unlock(lru_lock);
+
        BUG_ON(!node->count);
        BUG_ON(node->count & RADIX_TREE_COUNT_MASK);
 
@@ -323,30 +320,24 @@ static enum lru_status shadow_lru_isolate(struct 
list_head *item,
                        mapping->nrshadows--;
                }
        }
-       list_del_init(&node->lru);
        BUG_ON(node->count);
        if (!__radix_tree_delete_node(&mapping->page_tree, node))
                BUG();
 
-       spin_unlock(&mapping->tree_lock);
+       spin_unlock_irq(&mapping->tree_lock);
 
        count_vm_event(WORKINGSET_NODES_RECLAIMED);
-
-       return LRU_REMOVED;
+out_retry:
+       cond_resched();
+       spin_lock(lru_lock);
+       return LRU_RETRY;
 }
 
 static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
                                       struct shrink_control *sc)
 {
-       unsigned long batch = NOIRQ_BATCH;
-       unsigned long freed;
-
-       local_irq_disable();
-       freed = list_lru_walk_node(&workingset_shadow_nodes, sc->nid,
-                                  shadow_lru_isolate, &batch, &sc->nr_to_scan);
-       local_irq_enable();
-
-       return freed;
+       return list_lru_walk_node(&workingset_shadow_nodes, sc->nid,
+                                 shadow_lru_isolate, NULL, &sc->nr_to_scan);
 }
 
 static struct shrinker workingset_shadow_shrinker = {
@@ -356,13 +347,11 @@ static struct shrinker workingset_shadow_shrinker = {
        .flags = SHRINKER_NUMA_AWARE,
 };
 
-static struct lock_class_key shadow_nodes_key;
-
 static int __init workingset_init(void)
 {
        int ret;
 
-       ret = list_lru_init(&workingset_shadow_nodes, &shadow_nodes_key);
+       ret = list_lru_init(&workingset_shadow_nodes);
        if (ret)
                goto err;
        ret = register_shrinker(&workingset_shadow_shrinker);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to