From: Alex Tomas <[EMAIL PROTECTED]>

Signed-off-by: Aneesh Kumar K.V <[EMAIL PROTECTED]>
---
 fs/ext4/Makefile           |    2 +-
 fs/ext4/lg.c               |  576 ++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/super.c            |    5 +
 fs/fs-writeback.c          |    8 +-
 include/linux/ext4_fs.h    |   37 +++
 include/linux/ext4_fs_i.h  |    2 +
 include/linux/ext4_fs_sb.h |    6 +
 7 files changed, 630 insertions(+), 6 deletions(-)
 create mode 100644 fs/ext4/lg.c

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 7b24c73..f3d8ba7 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
 
 ext4dev-y      := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
                   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-                  ext4_jbd2.o writeback.o
+                  ext4_jbd2.o writeback.o lg.o
 
 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)     += xattr.o xattr_user.o xattr_trusted.o
 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/lg.c b/fs/ext4/lg.c
new file mode 100644
index 0000000..7fcdfe1
--- /dev/null
+++ b/fs/ext4/lg.c
@@ -0,0 +1,576 @@
+/*
+ * Copyright (c) 2006, Cluster File Systems, Inc, [EMAIL PROTECTED]
+ * Written by Alex Tomas <[EMAIL PROTECTED]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+
+/*
+ * locality groups
+ *
+ */
+
+/*
+ * TODO:
+ *  - too many of tricks
+ *  - mmap'ed files support (we need to link them to some group)
+ *  - too silly grouping policy
+ *  - free non-used groups after some timeout
+ *  - anonymous group for non-regular inodes
+ *
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_fs_i.h>
+#include <linux/ext4_fs_sb.h>
+#include <linux/jbd.h>
+#include <linux/smp_lock.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/writeback.h>
+
+#ifndef TestClearPageChecked
+#define TestClearPageChecked(page) test_and_clear_bit(PG_checked, 
&(page)->flags)
+#endif
+#ifndef TestSetPageChecked
+#define TestSetPageChecked(page) test_and_set_bit(PG_checked, &(page)->flags)
+#endif
+
+
+extern struct super_block *blockdev_superblock;
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+       return sb == blockdev_superblock;
+}
+
+extern int __writeback_single_inode(struct inode *, struct writeback_control 
*);
+
+struct ext4_locality_group *ext4_lg_find_group(struct super_block *sb)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_locality_group *lg = NULL;
+       struct list_head *cur;
+
+       rcu_read_lock();
+       list_for_each_rcu(cur, &sbi->s_locality_groups) {
+               lg = list_entry(cur, struct ext4_locality_group, lg_hash);
+               if (lg->lg_pgid == current->signal->pgrp) {
+                       spin_lock(&lg->lg_lock);
+                       if (lg->lg_deleted == 0) {
+                               atomic_inc(&lg->lg_count);
+                               spin_unlock(&lg->lg_lock);
+                               break;
+                       }
+                       spin_unlock(&lg->lg_lock);
+               }
+               lg = NULL;
+       }
+       rcu_read_unlock();
+       return lg;
+}
+
+void ext4_lg_put_group(struct ext4_locality_group *lg)
+{
+       atomic_dec(&lg->lg_count);
+}
+
+struct ext4_locality_group *ext4_lg_new_group(struct super_block *sb)
+{
+       struct ext4_locality_group *lg;
+
+       lg = kmalloc(sizeof(struct ext4_locality_group), GFP_NOFS);
+       if (lg == NULL)
+               return NULL;
+
+       lg->lg_pgid = current->signal->pgrp;
+       lg->lg_sid = current->signal->session;
+       spin_lock_init(&lg->lg_lock);
+       lg->lg_deleted = 0;
+       lg->lg_flags = 0;
+       atomic_set(&lg->lg_count, 1);
+       atomic_set(&lg->lg_inodes_nr, 0);
+       INIT_LIST_HEAD(&lg->lg_list);
+       INIT_LIST_HEAD(&lg->lg_inodes);
+       INIT_LIST_HEAD(&lg->lg_dirty);
+       INIT_LIST_HEAD(&lg->lg_io);
+       atomic_set(&lg->lg_dirty_pages, 0);
+       atomic_set(&lg->lg_nonallocated, 0);
+
+       return lg;
+}
+
+struct ext4_locality_group *
+ext4_lg_assign_to_group_nolock(struct inode *inode, struct ext4_locality_group 
*lg)
+{
+       /*
+        * XXX locking here?
+        */
+       if (EXT4_I(inode)->i_locality_group == NULL) {
+               EXT4_I(inode)->i_locality_group = lg;
+               list_add(&EXT4_I(inode)->i_lg_list, &lg->lg_inodes);
+               atomic_inc(&lg->lg_inodes_nr);
+       } else {
+               printk("somebody has already set lg %p (our %p) to inode 
%lu(%p)\n",
+                       EXT4_I(inode)->i_locality_group, lg, inode->i_ino, 
inode);
+               ext4_lg_put_group(lg);
+               lg = EXT4_I(inode)->i_locality_group;
+       }
+       return lg;
+}
+
+struct ext4_locality_group *
+ext4_lg_assign_to_group(struct inode *inode, struct ext4_locality_group *lg)
+{
+       spin_lock(&inode_lock);
+       ext4_lg_assign_to_group_nolock(inode, lg);
+       spin_unlock(&inode_lock);
+       return lg;
+       
+}
+
+struct ext4_locality_group *ext4_lg_find_or_allocate_group(struct inode *inode)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+       struct ext4_locality_group *lg, *olg;
+
+       lg = ext4_lg_find_group(inode->i_sb);
+       if (lg == NULL) {
+               lg = ext4_lg_new_group(inode->i_sb);
+               if (lg == NULL)
+                       return NULL;
+
+               spin_lock(&sbi->s_locality_lock);
+               olg = ext4_lg_find_group(inode->i_sb);
+               if (olg == NULL) {
+                       list_add_rcu(&lg->lg_hash, &sbi->s_locality_groups);
+               } else {
+                       kfree(lg);
+                       lg = olg;
+               }
+               spin_unlock(&sbi->s_locality_lock);
+       }
+
+       lg = ext4_lg_assign_to_group(inode, lg);
+       return lg;
+}
+
+/*
+ * every dirty page should be counted
+ */
+void ext4_lg_page_enter_inode(struct inode *inode,
+                               struct page *page, int allocated)
+{
+       struct ext4_locality_group *lg;
+
+       lg = EXT4_I(inode)->i_locality_group;
+       if (lg == NULL) {
+               lg = ext4_lg_find_or_allocate_group(inode);
+               if (lg == NULL)
+                       return;
+       }
+
+       if (!TestSetPageChecked(page)) {
+               atomic_inc(&lg->lg_dirty_pages);
+               if (!allocated)
+                       atomic_inc(&lg->lg_nonallocated);
+       }
+}
+
+
+/*
+ * 
+ */
+void ext4_lg_page_leave_inode(struct inode *inode,
+                               struct page *page, int allocated)
+{
+       struct ext4_locality_group *lg;
+
+       lg = EXT4_I(inode)->i_locality_group;
+       if (lg == NULL) {
+               if (S_ISREG(inode->i_mode))
+                       printk("regular file %lu/%u with no locality group?!\n",
+                               inode->i_ino, inode->i_generation);
+               return;
+       }
+
+       if (!TestClearPageChecked(page))
+               return;
+
+       atomic_dec(&lg->lg_dirty_pages);
+       if (!allocated)
+               atomic_dec(&lg->lg_nonallocated);
+}
+
+/*
+ * Inode leave group
+ */
+void ext4_lg_inode_leave_group(struct inode *inode)
+{
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       struct ext4_locality_group *lg;
+
+       if (inode->i_nlink != 0 && S_ISREG(inode->i_mode)) {
+               BUG_ON(mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY));
+       }
+
+       spin_lock(&inode_lock);
+       lg = ei->i_locality_group;
+       ei->i_locality_group = NULL;
+       spin_unlock(&inode_lock);
+
+       if (lg != NULL) {
+               spin_lock(&lg->lg_lock);
+               list_del(&ei->i_lg_list);
+               spin_unlock(&lg->lg_lock);
+               atomic_dec(&lg->lg_inodes_nr);
+               ext4_lg_put_group(lg);
+       }
+}
+
+#define EXT4_LG_DIRTY                  0
+
+#define EXT4_CONTINUE_WRITEBACK                1
+#define EXT4_STOP_WRITEBACK            2
+
+static char *__sync_modes[] = { "NONE", "ALL", "HOLD" };
+
+/*
+ * The function syncs a single group like generic_sync_sb_inodes() does
+ * returns:
+ *    0 - continue syncing with a next group
+ *    1 - break syncing
+ */
+int ext4_lg_sync_single_group(struct super_block *sb,
+                               struct ext4_locality_group *lg,
+                               struct writeback_control *wbc,
+                               unsigned long start)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       int nr_to_write = wbc->nr_to_write;
+       int dirty_pages, nonallocated;
+       int rc, code = 0;
+
+       dirty_pages = atomic_read(&lg->lg_dirty_pages);
+       nonallocated = atomic_read(&lg->lg_nonallocated);
+
+       rc = EXT4_CONTINUE_WRITEBACK;
+
+       spin_lock(&inode_lock);
+
+       if (!wbc->for_kupdate || list_empty(&lg->lg_io))
+               list_splice_init(&lg->lg_dirty, &lg->lg_io);
+
+       while (!list_empty(&lg->lg_io)) {
+               struct inode *inode = list_entry(lg->lg_io.prev,
+                               struct inode, i_list);
+               struct address_space *mapping = inode->i_mapping;
+               struct backing_dev_info *bdi = mapping->backing_dev_info;
+               long pages_skipped;
+
+               if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                       /* underlying device is congested
+                        * break all writeback immediately */
+                       wbc->encountered_congestion = 1;
+
+                       /* keep this inode on the head so that
+                        * we'll continue writeback with it
+                        * when we return to this locality group */
+
+                       /* same for the locality group */
+                       set_bit(EXT4_LG_DIRTY, &lg->lg_flags);
+                       list_move(&lg->lg_list, &sbi->s_locality_io);
+
+                       /* signal to the caller */
+                       rc = EXT4_STOP_WRITEBACK;
+                       code = 1;
+                       break;
+               }
+
+               if (wbc->bdi && bdi != wbc->bdi) {
+                       printk("wbc->bdi (%p) != bdi (%p)\n", wbc->bdi, bdi);
+                       list_move(&inode->i_list, &inode_in_use);
+                       rc = EXT4_CONTINUE_WRITEBACK;
+                       code = 2;
+                       break;
+               }
+
+               /* Was this inode dirtied after sync_sb_inodes was called? */
+               if (time_after(inode->dirtied_when, start)) {
+                       /* keep this inode on the head so that
+                        * we'll continue writeback with it
+                        * when we return to this locality group */
+
+                       /* continue with next locality group
+                        * move this one to the dirty tail */
+                       set_bit(EXT4_LG_DIRTY, &lg->lg_flags);
+                       list_move_tail(&lg->lg_list, &sbi->s_locality_dirty);
+
+                       rc = EXT4_CONTINUE_WRITEBACK;
+                       code = 3;
+                       break;
+               }
+
+               /* Was this inode dirtied too recently? */
+               if (wbc->older_than_this && time_after(inode->dirtied_when,
+                                       *wbc->older_than_this)) {
+                       /* keep this inode on the head so that
+                        * we'll continue writeback with it
+                        * when we return to this locality group */
+
+                       /* continue with next locality group
+                        * move this one to the dirty tail */
+                       set_bit(EXT4_LG_DIRTY, &lg->lg_flags);
+                       list_move_tail(&lg->lg_list, &sbi->s_locality_dirty);
+
+                       rc = EXT4_CONTINUE_WRITEBACK;
+                       code = 4;
+                       break;
+               }
+
+               /* Is another pdflush already flushing this queue? */
+               if (current_is_pdflush() && !writeback_acquire(bdi)) {
+                       /* keep this inode on the head so that
+                        * we'll continue writeback with it
+                        * when we return to this locality group */
+
+                       /* same for the locality group */
+                       list_move(&lg->lg_list, &sbi->s_locality_io);
+
+                       rc = EXT4_STOP_WRITEBACK;
+                       code = 5;
+                       break;
+               }
+
+               BUG_ON(inode->i_state & I_FREEING);
+               __iget(inode);
+               pages_skipped = wbc->pages_skipped;
+               __writeback_single_inode(inode, wbc);
+               if (wbc->sync_mode == WB_SYNC_HOLD) {
+                       inode->dirtied_when = jiffies;
+                       list_move(&inode->i_list, &lg->lg_dirty);
+                       set_bit(EXT4_LG_DIRTY, &lg->lg_flags);
+                       list_move(&lg->lg_list, &sbi->s_locality_dirty);
+               }
+               if (current_is_pdflush())
+                       writeback_release(bdi);
+               if (wbc->pages_skipped != pages_skipped) {
+                       /*
+                        * writeback is not making progress due to locked
+                        * buffers.  Skip this inode for now.
+                        */
+                       list_move(&inode->i_list, &lg->lg_dirty);
+
+                       set_bit(EXT4_LG_DIRTY, &lg->lg_flags);
+                       list_move(&lg->lg_list, &sbi->s_locality_dirty);
+               }
+               spin_unlock(&inode_lock);
+               iput(inode);
+               cond_resched();
+               spin_lock(&inode_lock);
+               if (wbc->nr_to_write <= 0) {
+                       rc = EXT4_STOP_WRITEBACK;
+                       code = 6;
+                       break;
+               }
+       }
+
+       spin_unlock(&inode_lock);
+
+       if (0 && nr_to_write - wbc->nr_to_write) {
+               printk("#%u: %s/%lu/%s%s%s%s%s%s M: %lu/%lu/%lu "
+                       "LG:%p/%u/%u[%u/%u] wrote %lu/%d\n",
+                       current->pid, __sync_modes[wbc->sync_mode],
+                       wbc->nr_to_write,
+                       wbc->nonblocking ? "N" : "",
+                       wbc->encountered_congestion ? "C" : "",
+                       wbc->for_kupdate ? "U" : "",
+                       wbc->for_reclaim ? "R" : "",
+                       wbc->for_writepages ? "W" : "",
+                       wbc->range_cyclic ? "I" : "",
+                       global_page_state(NR_FILE_DIRTY),
+                       global_page_state(NR_UNSTABLE_NFS),
+                       global_page_state(NR_WRITEBACK),
+                       lg, atomic_read(&lg->lg_count), lg->lg_pgid,
+                       dirty_pages, nonallocated,
+                       nr_to_write - wbc->nr_to_write, code);
+       }
+
+       return rc;
+}
+
+/*
+ * the core of inode syncer:
+ *  - loop over locality groups
+ *  - maintain them in order to avoid starvation
+ */
+void ext4_lg_sync_groups(struct super_block *sb, struct writeback_control *wbc)
+{
+       const unsigned long start = jiffies;    /* livelock avoidance */
+       struct ext4_locality_group *lg = NULL, *prev = NULL;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       int rc;
+
+       spin_lock(&inode_lock);
+
+       /*printk("#%u: mode %s, nr2wr %lu, %s%s%s%s%s%s M: %lu/%lu/%lu "
+                       "LGs: %sdirty %sio\n", current->pid,
+                       __sync_modes[wbc->sync_mode], wbc->nr_to_write,
+                       wbc->nonblocking ? "nonblock " : "",
+                       wbc->encountered_congestion ? "congested " : "",
+                       wbc->for_kupdate ? "kupdate " : "",
+                       wbc->for_reclaim ? "reclaim " : "",
+                       wbc->for_writepages ? "writepages " : "",
+                       wbc->range_cyclic ? "cyclic " : "",
+                       global_page_state(NR_FILE_DIRTY),
+                       global_page_state(NR_UNSTABLE_NFS),
+                       global_page_state(NR_WRITEBACK),
+                       list_empty(&sbi->s_locality_dirty) ? "-" : "+",
+                       list_empty(&sbi->s_locality_io) ? "-" : "+");*/
+
+       if (!wbc->for_kupdate || list_empty(&sbi->s_locality_io))
+               list_splice_init(&sbi->s_locality_dirty, &sbi->s_locality_io);
+
+       while (!list_empty(&sbi->s_locality_io)) {
+
+               /* we should handle same group twice in a row */
+               WARN_ON(prev && prev == lg);
+               prev = lg;
+
+               lg = list_entry(sbi->s_locality_io.prev,
+                               struct ext4_locality_group, lg_list);
+
+               /* protect locality group */
+               atomic_inc(&lg->lg_count);
+
+               /* to avoid two concurrent threads flushing same group */
+               list_del_init(&lg->lg_list);
+
+               spin_unlock(&inode_lock);
+
+               clear_bit(EXT4_LG_DIRTY, &lg->lg_flags);
+               rc = ext4_lg_sync_single_group(sb, lg, wbc, start);
+
+               spin_lock(&inode_lock);
+               ext4_lg_put_group(lg);
+
+               if (rc == EXT4_STOP_WRITEBACK)
+                       break;
+       }
+       spin_unlock(&inode_lock);
+}
+
+/*
+ * entry function for inode syncing
+ * it's responsbility is to sort all inode out in their locality groups
+ */
+void ext4_lg_sync_inodes(struct super_block *sb, struct writeback_control *wbc)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_locality_group *lg;
+
+       /* refill pending groups from s_dirty */
+       spin_lock(&inode_lock);
+       while (!list_empty(&sb->s_dirty)) {
+               struct inode *inode = list_entry(sb->s_dirty.prev,
+                                               struct inode, i_list);
+               struct ext4_inode_info *ei = EXT4_I(inode);
+
+               lg = ei->i_locality_group;
+               if (lg == NULL) {
+                       if (S_ISDIR(inode->i_mode) || i_size_read(inode) == 0) {
+                               if (atomic_read(&inode->i_count)) {
+                                       /*
+                                        * The inode is clean, inuse
+                                        */
+                                       list_move(&inode->i_list, 
&inode_in_use);
+                               } else {
+                                       /*
+                                        * The inode is clean, unused
+                                        */
+                                       list_move(&inode->i_list, 
&inode_unused);
+                               }
+                               continue;
+                       }
+                       /* XXX: atime changed ? or mmap? 
+                        * anyway, assign the inode to anonymous group */
+                       lg = sbi->s_locality_anon;
+                       atomic_inc(&lg->lg_count);
+                       lg = ext4_lg_assign_to_group_nolock(inode, lg);
+               }
+
+               /* move inode in proper locality group's dirty list */
+               spin_lock(&lg->lg_lock);
+               list_move_tail(&inode->i_list, &lg->lg_dirty);
+               spin_unlock(&lg->lg_lock);
+
+               if (!test_and_set_bit(EXT4_LG_DIRTY, &lg->lg_flags))
+                       list_move(&lg->lg_list, &sbi->s_locality_dirty);
+       }
+       spin_unlock(&inode_lock);
+
+       ext4_lg_sync_groups(sb, wbc);
+}
+
+void ext4_lg_init(struct super_block *sb)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_locality_group *lg;
+
+       sb->s_flags |= 2048; /* XXX: i'll fix this, i promise */
+       spin_lock_init(&sbi->s_locality_lock);
+       INIT_LIST_HEAD(&sbi->s_locality_groups);
+       INIT_LIST_HEAD(&sbi->s_locality_dirty);
+       INIT_LIST_HEAD(&sbi->s_locality_io);
+
+       lg = ext4_lg_new_group(sb);
+       if (lg != NULL)
+               sbi->s_locality_anon = lg;
+}
+
+void ext4_lg_release(struct super_block *sb)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_locality_group *lg;
+       struct list_head *cur, *tmp;
+
+       list_for_each_safe_rcu(cur, tmp, &sbi->s_locality_groups) {
+               lg = list_entry(cur, struct ext4_locality_group, lg_hash);
+               if (atomic_read(&lg->lg_count))
+                       printk("LG %p/%d (pgid %u), %u inodes, dirty %d, 
non-allocated %d\n",
+                               lg, atomic_read(&lg->lg_count),
+                               atomic_read(&lg->lg_inodes_nr), lg->lg_pgid,
+                               atomic_read(&lg->lg_dirty_pages),
+                               atomic_read(&lg->lg_nonallocated));
+               list_del(&lg->lg_hash);
+               kfree(lg);
+       }
+       lg = sbi->s_locality_anon;
+       if (lg) {
+               if (atomic_read(&lg->lg_count) > 1)
+                       printk("LG anon/%d, %u inodes, dirty %d, non-allocated 
%d\n",
+                               atomic_read(&lg->lg_count),
+                               atomic_read(&lg->lg_inodes_nr),
+                               atomic_read(&lg->lg_dirty_pages),
+                               atomic_read(&lg->lg_nonallocated));
+               kfree(lg);
+       }
+}
+
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5bd2762..efc9270 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -452,6 +452,7 @@ static void ext4_put_super (struct super_block * sb)
                mark_buffer_dirty(sbi->s_sbh);
                ext4_commit_super(sb, es, 1);
        }
+       ext4_lg_release(sb);
 
        for (i = 0; i < sbi->s_gdb_count; i++)
                brelse(sbi->s_group_desc[i]);
@@ -501,6 +502,7 @@ static struct inode *ext4_alloc_inode(struct super_block 
*sb)
        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
        if (!ei)
                return NULL;
+       ei->i_locality_group = NULL;
 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
        ei->i_acl = EXT4_ACL_NOT_CACHED;
        ei->i_default_acl = EXT4_ACL_NOT_CACHED;
@@ -571,6 +573,7 @@ static void ext4_clear_inode(struct inode *inode)
        EXT4_I(inode)->i_block_alloc_info = NULL;
        if (unlikely(rsv))
                kfree(rsv);
+       ext4_lg_inode_leave_group(inode);
 }
 
 static inline void ext4_show_quota_options(struct seq_file *seq, struct 
super_block *sb)
@@ -713,6 +716,7 @@ static const struct super_operations ext4_sops = {
        .remount_fs     = ext4_remount,
        .clear_inode    = ext4_clear_inode,
        .show_options   = ext4_show_options,
+       .sync_inodes    = ext4_lg_sync_inodes,
 #ifdef CONFIG_QUOTA
        .quota_read     = ext4_quota_read,
        .quota_write    = ext4_quota_write,
@@ -1960,6 +1964,7 @@ static int ext4_fill_super (struct super_block *sb, void 
*data, int silent)
                test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
                "writeback");
 
+       ext4_lg_init(sb);
        ext4_ext_init(sb);
        ext4_reserve_init(sb);
        ext4_wb_init(sb);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index cdcff8c..7806778 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -149,8 +149,7 @@ static int write_inode(struct inode *inode, int sync)
  *
  * Called under inode_lock.
  */
-static int
-__sync_single_inode(struct inode *inode, struct writeback_control *wbc)
+int __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
        unsigned dirty;
        struct address_space *mapping = inode->i_mapping;
@@ -240,8 +239,7 @@ __sync_single_inode(struct inode *inode, struct 
writeback_control *wbc)
  * caller has ref on the inode (either via __iget or via syscall against an fd)
  * or the inode has I_WILL_FREE set (via generic_forget_inode)
  */
-static int
-__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
+int __writeback_single_inode(struct inode *inode, struct writeback_control 
*wbc)
 {
        wait_queue_head_t *wqh;
 
@@ -440,7 +438,7 @@ writeback_inodes(struct writeback_control *wbc)
 restart:
        sb = sb_entry(super_blocks.prev);
        for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
-               if (!list_empty(&sb->s_dirty) || !list_empty(&sb->s_io)) {
+               if (!list_empty(&sb->s_dirty) || !list_empty(&sb->s_io) || 
(sb->s_flags & 2048)) {
                        /* we're making our own get_super here */
                        sb->s_count++;
                        spin_unlock(&sb_lock);
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 138fcbc..cd477e2 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -824,6 +824,34 @@ struct dx_hash_info
 
 
 /*
+ * Locality group:
+ *   we try to group all related changes together
+ *   so that writeback can flush/allocate them together as well
+ */
+struct ext4_locality_group {
+       int                     lg_parent;
+       int                     lg_pgid;
+       int                     lg_sid;
+       struct list_head        lg_hash;
+       spinlock_t              lg_lock;
+       int                     lg_deleted;
+       atomic_t                lg_count;
+       atomic_t                lg_inodes_nr;
+
+       /* */
+       unsigned long           lg_flags;
+       struct list_head        lg_list;
+
+       /* inode lists for the group */
+       struct list_head        lg_inodes;      /* inodes in the group */
+       struct list_head        lg_dirty;       /* dirty inodes from s_dirty */
+       struct list_head        lg_io;          /* inodes scheduled for flush */
+
+       atomic_t                lg_dirty_pages; /* pages to write */
+       atomic_t                lg_nonallocated;/* non-allocated pages */
+};
+
+/*
  * Describe an inode's exact location on disk and in memory
  */
 struct ext4_iloc
@@ -881,6 +909,15 @@ void ext4_get_group_no_and_offset(struct super_block *sb, 
ext4_fsblk_t blocknr,
 # define ATTRIB_NORET  __attribute__((noreturn))
 # define NORET_AND     noreturn,
 
+/* lg.c */
+extern void ext4_lg_init(struct super_block *sb);
+extern void ext4_lg_release(struct super_block *sb);
+extern void ext4_lg_inode_leave_group(struct inode *inode);
+extern void ext4_lg_page_enter_inode(struct inode *inode, struct page *page, 
int allocated);
+extern void ext4_lg_page_leave_inode(struct inode *inode, struct page *page, 
int allocated);
+extern void ext4_lg_sync_inodes(struct super_block *, struct writeback_control 
*);
+
+
 /* balloc.c */
 extern unsigned int ext4_block_group(struct super_block *sb,
                        ext4_fsblk_t blocknr);
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 9dea1f7..6d9f9db 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -150,6 +150,8 @@ struct ext4_inode_info {
         */
        struct mutex truncate_mutex;
        struct inode vfs_inode;
+       struct list_head i_lg_list;
+       struct ext4_locality_group *i_locality_group;
 
        unsigned long i_ext_generation;
        struct ext4_ext_cache i_cached_extent;
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 9768b32..08b0645 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -86,6 +86,12 @@ struct ext4_sb_info {
 #endif
        unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
 
+       struct ext4_locality_group *s_locality_anon;
+       struct list_head s_locality_dirty;
+       struct list_head s_locality_io;
+       struct list_head s_locality_groups;
+       spinlock_t s_locality_lock;
+
 #ifdef EXTENTS_STATS
        /* ext4 extents stats */
        unsigned long s_ext_min;
-- 
1.5.3.rc0.30.g114fd-dirty

-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to