Please ignore previous patch, it has an issue with operator precedence
in ext3_get_grp_metacluster(), try this instead:

Signed-off-by: Abhishek Rai <[EMAIL PROTECTED]>

diff -uprdN linux-2.6.23mm1-clean/fs/ext3/balloc.c
linux-2.6.23mm1-ext3mc/fs/ext3/balloc.c
--- linux-2.6.23mm1-clean/fs/ext3/balloc.c      2007-10-17 18:31:42.000000000 
-0700
+++ linux-2.6.23mm1-ext3mc/fs/ext3/balloc.c     2007-11-15 11:23:51.000000000 
-0800
@@ -711,6 +711,7 @@ bitmap_search_next_usable_block(ext3_grp
        ext3_grpblk_t next;
        struct journal_head *jh = bh2jh(bh);

+       BUG_ON(start > maxblocks);
        while (start < maxblocks) {
                next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
                if (next >= maxblocks)
@@ -841,10 +842,12 @@ claim_block(spinlock_t *lock, ext3_grpbl
 static ext3_grpblk_t
 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
                        struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
-                       unsigned long *count, struct ext3_reserve_window 
*my_rsv)
+                       int use_metacluster, unsigned long *count,
+                       struct ext3_reserve_window *my_rsv)
 {
        ext3_fsblk_t group_first_block;
        ext3_grpblk_t start, end;
+       ext3_grpblk_t mc_start, mc_end, start2 = -1, end2 = -1;
        unsigned long num = 0;

        /* we do allocation within the reservation window if we have a window */
@@ -872,12 +875,48 @@ ext3_try_to_allocate(struct super_block
        }

        BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
+       /* start must have been set to grp_goal if one still exists. */
+       BUG_ON(grp_goal >= 0 && start != grp_goal);
+
+       if (test_opt(sb, METACLUSTER) && !use_metacluster) {
+               ext3_get_grp_metacluster(sb, &mc_start, &mc_end);
+
+               /*
+                * If there is an overlap with metacluster range, adjust our
+                * range to remove overlap, splitting our range into two if
+                * needed.
+                */
+               if (mc_end > mc_start) {
+                       if (mc_start <= start)
+                               start = max_t(ext3_grpblk_t, start, mc_end);
+                       else if (mc_end >= end)
+                               end = min_t(ext3_grpblk_t, end, mc_start);
+                       else {
+                               start2 = mc_end;
+                               end2 = end;
+                               end = mc_start;
+                       }
+               }
+       }
+
+       if (start >= end)
+               goto fail_access;
+
+       if (grp_goal > 0)
+               grp_goal = start;

 repeat:
        if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
                grp_goal = find_next_usable_block(start, bitmap_bh, end);
-               if (grp_goal < 0)
+               if (grp_goal < 0) {
+                       if (start2 >= 0) {
+                               start = start2;
+                               end = end2;
+                               start2 = -1;
+                               goto repeat;
+                       }
                        goto fail_access;
+               }
                if (!my_rsv) {
                        int i;

@@ -898,8 +937,15 @@ repeat:
                 */
                start++;
                grp_goal++;
-               if (start >= end)
-                       goto fail_access;
+               if (start >= end) {
+                       if (start2 < 0)
+                               goto fail_access;
+
+                       start = start2;
+                       end = end2;
+                       start2 = -1;
+                       grp_goal = -1;
+               }
                goto repeat;
        }
        num++;
@@ -1084,6 +1130,7 @@ static int alloc_new_reservation(struct
        unsigned long size;
        int ret;
        spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+       ext3_grpblk_t mc_start, mc_end;

        group_first_block = ext3_group_first_block_no(sb, group);
        group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
@@ -1143,6 +1190,7 @@ static int alloc_new_reservation(struct
         * To make sure the reservation window has a free bit inside it, we
         * need to check the bitmap after we found a reservable window.
         */
+       ext3_get_grp_metacluster(sb, &mc_start, &mc_end);
 retry:
        ret = find_next_reservable_window(search_head, my_rsv, sb,
                                                start_block, group_end_block);
@@ -1170,6 +1218,11 @@ retry:
                        my_rsv->rsv_start - group_first_block,
                        bitmap_bh, group_end_block - group_first_block + 1);

+       if (first_free_block >= mc_start && first_free_block < mc_end) {
+               start_block = mc_end;
+               goto next;
+       }
+
        if (first_free_block < 0) {
                /*
                 * no free block left on the bitmap, no point
@@ -1195,6 +1248,7 @@ retry:
         * start from where the free block is,
         * we also shift the list head to where we stopped last time
         */
+next:
        search_head = my_rsv;
        spin_lock(rsv_lock);
        goto retry;
@@ -1223,12 +1277,18 @@ static void try_to_extend_reservation(st
        struct ext3_reserve_window_node *next_rsv;
        struct rb_node *next;
        spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+       ext3_grpblk_t mc_start, mc_end;

        if (!spin_trylock(rsv_lock))
                return;

        next = rb_next(&my_rsv->rsv_node);

+       ext3_get_grp_metacluster(sb, &mc_start, &mc_end);
+
+       if (my_rsv->rsv_end >= mc_start && my_rsv->rsv_end < mc_end)
+               size += mc_end - 1 - my_rsv->rsv_end;
+
        if (!next)
                my_rsv->rsv_end += size;
        else {
@@ -1274,7 +1334,7 @@ static void try_to_extend_reservation(st
 static ext3_grpblk_t
 ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
                        unsigned int group, struct buffer_head *bitmap_bh,
-                       ext3_grpblk_t grp_goal,
+                       ext3_grpblk_t grp_goal, int use_metacluster,
                        struct ext3_reserve_window_node * my_rsv,
                        unsigned long *count, int *errp)
 {
@@ -1305,7 +1365,8 @@ ext3_try_to_allocate_with_rsv(struct sup
         */
        if (my_rsv == NULL ) {
                ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
-                                               grp_goal, count, NULL);
+                                               grp_goal, use_metacluster,
+                                               count, NULL);
                goto out;
        }
        /*
@@ -1361,7 +1422,8 @@ ext3_try_to_allocate_with_rsv(struct sup
                        BUG();
                }
                ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
-                                          grp_goal, &num, &my_rsv->rsv_window);
+                                               grp_goal, use_metacluster,
+                                               &num, &my_rsv->rsv_window);
                if (ret >= 0) {
                        my_rsv->rsv_alloc_hit += num;
                        *count = num;
@@ -1455,6 +1517,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h
        int bgi;                        /* blockgroup iteration index */
        int fatal = 0, err;
        int performed_allocation = 0;
+       int use_metacluster = 0;
        ext3_grpblk_t free_blocks;      /* number of free blocks in a group */
        struct super_block *sb;
        struct ext3_group_desc *gdp;
@@ -1473,6 +1536,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h
        sb = inode->i_sb;
        if (!sb) {
                printk("ext3_new_block: nonexistent device");
+               *errp = -ENODEV;
                return 0;
        }

@@ -1487,6 +1551,11 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h
        sbi = EXT3_SB(sb);
        es = EXT3_SB(sb)->s_es;
        ext3_debug("goal=%lu.\n", goal);
+
+       /* Caller should ensure this. */
+       BUG_ON(goal < le32_to_cpu(es->s_first_data_block) ||
+              goal >= le32_to_cpu(es->s_blocks_count));
+
        /*
         * Allocate a block from reservation only when
         * filesystem is mounted with reservation(default,-o reservation), and
@@ -1507,9 +1576,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *h
        /*
         * First, test whether the goal block is free.
         */
-       if (goal < le32_to_cpu(es->s_first_data_block) ||
-           goal >= le32_to_cpu(es->s_blocks_count))
-               goal = le32_to_cpu(es->s_first_data_block);
        group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
                        EXT3_BLOCKS_PER_GROUP(sb);
        goal_group = group_no;
@@ -1535,7 +1601,7 @@ retry_alloc:
                        goto io_error;
                grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
                                        group_no, bitmap_bh, grp_target_blk,
-                                       my_rsv, &num, &fatal);
+                                       use_metacluster, my_rsv, &num, &fatal);
                if (fatal)
                        goto out;
                if (grp_alloc_blk >= 0)
@@ -1573,8 +1639,8 @@ retry_alloc:
                 * try to allocate block(s) from this group, without a goal(-1).
                 */
                grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
-                                       group_no, bitmap_bh, -1, my_rsv,
-                                       &num, &fatal);
+                                       group_no, bitmap_bh, -1,
+                                       use_metacluster, my_rsv, &num, &fatal);
                if (fatal)
                        goto out;
                if (grp_alloc_blk >= 0)
@@ -1593,6 +1659,10 @@ retry_alloc:
                group_no = goal_group;
                goto retry_alloc;
        }
+       if (test_opt(sb, METACLUSTER) && use_metacluster == 0) {
+               use_metacluster = 1;
+               goto retry_alloc;
+       }
        /* No space left on the device */
        *errp = -ENOSPC;
        goto out;
@@ -1713,6 +1783,161 @@ ext3_fsblk_t ext3_new_block(handle_t *ha
        return ext3_new_blocks(handle, inode, goal, &count, errp);
 }

+/*
+ * ext3_new_indirect_blocks() -- allocate indirect blocks for inode.
+ * @inode:             file inode
+ * @count:             target number of indirect blocks to allocate
+ * @new_blocks[]:       used for returning block numbers allocated
+ *
+ * return: 0 on success, appropriate error code otherwise. Upon return, *count
+ * contains the number of blocks successfully allocated which is non-zero only
+ * in the success case.
+ *
+ * Allocate maximum of *count indirect blocks from the indirect block metadata
+ * area of inode's group and store the block numbers in new_blocksp[]. Since
+ * the allocation is in a predetermined region of the block group, caller just
+ * needs to pass a group number here which is where the goal and/or the
+ * reservation window may fall.
+ */
+int ext3_new_indirect_blocks(handle_t *handle, struct inode *inode,
+                       unsigned long group_no, unsigned long *count,
+                       ext3_fsblk_t new_blocks[])
+{
+       struct super_block *sb;
+       struct ext3_sb_info *sbi;
+       struct buffer_head *bitmap_bh = NULL;
+       struct buffer_head *gdp_bh;
+       struct ext3_group_desc *gdp;
+       ext3_grpblk_t group_first_block;      /* first block in the group */
+       ext3_grpblk_t free_blocks;      /* number of free blocks in the group */
+       ext3_grpblk_t mc_start, mc_end;
+       int blk, done = 0;
+       int err = 0;
+
+       BUG_ON(*count > 3);
+
+       sb = inode->i_sb;
+       if (!sb) {
+               printk(KERN_INFO "ext3_new_indirect_blocks: "
+                       "nonexistent device");
+               return -ENODEV;
+       }
+       BUG_ON(!test_opt(sb, METACLUSTER));
+       sbi = EXT3_SB(sb);
+
+       if (DQUOT_ALLOC_BLOCK(inode, *count))
+               return -EDQUOT;
+
+       if (!ext3_has_free_blocks(sbi)) {
+               err = -ENOSPC;
+               goto out;
+       }
+
+       gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+       if (!gdp) {
+               err = -EIO;
+               goto out;
+       }
+
+       free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+       if (free_blocks == 0) {
+               err = -ENOSPC;
+               goto out;
+       }
+
+       bitmap_bh = read_block_bitmap(sb, group_no);
+       if (!bitmap_bh) {
+               err = -EIO;
+               goto out;
+       }
+
+       /*
+        * Make sure we use undo access for the bitmap, because it is critical
+        * that we do the frozen_data COW on bitmap buffers in all cases even
+        * if the buffer is in BJ_Forget state in the committing transaction.
+        */
+       BUFFER_TRACE(bitmap_bh, "get undo access for new indirect block");
+       err = ext3_journal_get_undo_access(handle, bitmap_bh);
+       if (err)
+               goto out;
+
+       err = -ENOSPC;
+       group_first_block = ext3_group_first_block_no(sb, group_no);
+       ext3_get_grp_metacluster(sb, &mc_start, &mc_end);
+       blk = mc_start;
+
+       while (done < *count && blk < mc_end) {
+               if (!ext3_test_allocatable(blk, bitmap_bh)) {
+                       /*
+                        * Don't use find_next_usable_block() here as it may
+                        * skip free blocks that are not close to the goal.
+                        * Since our goal is always fixed (mc_start), we may
+                        * be trying to allocate slightly far from it and that
+                        * will be a problem.
+                        */
+                       blk = bitmap_search_next_usable_block(blk, bitmap_bh,
+                                                               mc_end);
+                       continue;
+               }
+               if (claim_block(sb_bgl_lock(sbi, group_no), blk,
+                               bitmap_bh)) {
+                       new_blocks[done++] = group_first_block + blk;
+               } else {
+                       /*
+                        * The block was allocated by another thread, or it
+                        * was allocated and then freed by another thread
+                        */
+                       cpu_relax();
+               }
+               blk++;
+       }
+
+       if (!done) {
+               BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
+               ext3_journal_release_buffer(handle, bitmap_bh);
+               goto out;
+       }
+
+       BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
+       err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+       if (err)
+               goto out;
+
+       BUFFER_TRACE(gdp_bh, "get_write_access");
+       err = ext3_journal_get_write_access(handle, gdp_bh);
+       if (err)
+               goto out;
+
+       /*
+        * Caller is responsible for adding the new indirect block buffers
+        * to the journal list.
+        */
+
+       spin_lock(sb_bgl_lock(sbi, group_no));
+       gdp->bg_free_blocks_count =
+               cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - done);
+       spin_unlock(sb_bgl_lock(sbi, group_no));
+       percpu_counter_sub(&sbi->s_freeblocks_counter, done);
+
+       BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
+       err = ext3_journal_dirty_metadata(handle, gdp_bh);
+       sb->s_dirt = 1;
+       if (err)
+               goto out;
+
+out:
+       if (bitmap_bh)
+               brelse(bitmap_bh);
+
+       DQUOT_FREE_BLOCK(inode, *count - done);
+       *count = done;
+
+       if (err && err != -ENOSPC)
+               ext3_error(sb, "ext3_new_indirect_blocks", "error %d", err);
+
+       return err;
+}
+
 /**
  * ext3_count_free_blocks() -- count filesystem free blocks
  * @sb:                superblock
diff -uprdN linux-2.6.23mm1-clean/fs/ext3/inode.c
linux-2.6.23mm1-ext3mc/fs/ext3/inode.c
--- linux-2.6.23mm1-clean/fs/ext3/inode.c       2007-10-17 18:31:42.000000000 
-0700
+++ linux-2.6.23mm1-ext3mc/fs/ext3/inode.c      2007-11-15 11:21:07.000000000 
-0800
@@ -39,7 +39,29 @@
 #include "xattr.h"
 #include "acl.h"

+typedef struct {
+       __le32  *p;
+       __le32  key;
+       struct buffer_head *bh;
+} Indirect;
+
+struct ext3_ind_read_info {
+       int                     count;
+       int                     seq_prefetch;
+       long                    size;
+       struct buffer_head      *bh[0];
+};
+
+# define EXT3_IND_READ_INFO_SIZE(_c)        \
+       (sizeof(struct ext3_ind_read_info) + \
+        sizeof(struct buffer_head *) * (_c))
+
+# define EXT3_IND_READ_MAX             (32)
+
 static int ext3_writepage_trans_blocks(struct inode *inode);
+static Indirect *ext3_read_indblocks(struct inode *inode, int iblock,
+                                       int depth, int offsets[4],
+                                       Indirect chain[4], int *err);

 /*
  * Test whether an inode is a fast symlink.
@@ -233,12 +255,6 @@ no_delete:
        clear_inode(inode);     /* We must guarantee clearing of inode... */
 }

-typedef struct {
-       __le32  *p;
-       __le32  key;
-       struct buffer_head *bh;
-} Indirect;
-
 static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
 {
        p->key = *(p->p = v);
@@ -352,18 +368,21 @@ static int ext3_block_to_path(struct ino
  *     the whole chain, all way to the data (returns %NULL, *err == 0).
  */
 static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
-                                Indirect chain[4], int *err)
+                                Indirect chain[4], int ind_readahead, int *err)
 {
        struct super_block *sb = inode->i_sb;
        Indirect *p = chain;
        struct buffer_head *bh;
+       int index;

        *err = 0;
        /* i_data is not going away, no lock needed */
        add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
        if (!p->key)
                goto no_block;
-       while (--depth) {
+       for (index = 0; index < depth - 1; index++) {
+               if (ind_readahead && depth > 2 && index == depth - 2)
+                       break;
                bh = sb_bread(sb, le32_to_cpu(p->key));
                if (!bh)
                        goto failure;
@@ -396,7 +415,15 @@ no_block:
  *     It is used when heuristic for sequential allocation fails.
  *     Rules are:
  *       + if there is a block to the left of our position - allocate near it.
- *       + if pointer will live in indirect block - allocate near that block.
+ *       + If METACLUSTER options is not specified, allocate the data
+ *       block close to the metadata block. Otherwise, if pointer will live in
+ *       indirect block, we cannot allocate near the indirect block since
+ *       indirect blocks are allocated in a reserved area. Even if we allocate
+ *       this block right after the preceding logical file block, we'll still
+ *       have to incur extra seek due to the indirect block (unless we
+ *       prefetch the indirect block separately). So for now (until
+ *       prefetching is turned on), it's OK not to return a sequential goal -
+ *       just put in the same cylinder group as the inode.
  *       + if pointer will live in inode - allocate in the same
  *         cylinder group.
  *
@@ -421,9 +448,11 @@ static ext3_fsblk_t ext3_find_near(struc
                        return le32_to_cpu(*p);
        }

-       /* No such thing, so let's try location of indirect block */
-       if (ind->bh)
-               return ind->bh->b_blocknr;
+       if (!test_opt(inode->i_sb, METACLUSTER)) {
+               /* No such thing, so let's try location of indirect block */
+               if (ind->bh)
+                       return ind->bh->b_blocknr;
+       }

        /*
         * It is going to be referred to from the inode itself? OK, just put it
@@ -475,8 +504,7 @@ static ext3_fsblk_t ext3_find_goal(struc
  *     @blks: number of data blocks to be mapped.
  *     @blocks_to_boundary:  the offset in the indirect block
  *
- *     return the total number of blocks to be allocate, including the
- *     direct and indirect blocks.
+ *     return the total number of direct blocks to be allocated.
  */
 static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
                int blocks_to_boundary)
@@ -508,22 +536,39 @@ static int ext3_blks_to_allocate(Indirec
  *     ext3_alloc_blocks: multiple allocate blocks needed for a branch
  *     @indirect_blks: the number of blocks need to allocate for indirect
  *                     blocks
- *
+ *     @blks: the number of direct blocks to be allocated
  *     @new_blocks: on return it will store the new block numbers for
  *     the indirect blocks(if needed) and the first direct block,
- *     @blks:  on return it will store the total number of allocated
- *             direct blocks
+ *
+ *     returns the number of direct blocks allocated, error via *err, and
+ *     new block numbers via new_blocks[]
  */
 static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
                        ext3_fsblk_t goal, int indirect_blks, int blks,
                        ext3_fsblk_t new_blocks[4], int *err)
 {
+       struct super_block *sb;
+       struct ext3_super_block *es;
        int target, i;
-       unsigned long count = 0;
+       unsigned long count = 0, goal_group;
        int index = 0;
        ext3_fsblk_t current_block = 0;
        int ret = 0;

+       BUG_ON(blks <= 0);
+
+       sb = inode->i_sb;
+       if (!sb) {
+               printk(KERN_INFO "ext3_alloc_blocks: nonexistent device");
+               *err = -ENODEV;
+               return 0;
+       }
+       es = EXT3_SB(sb)->s_es;
+
+       if (goal < le32_to_cpu(es->s_first_data_block) ||
+           goal >= le32_to_cpu(es->s_blocks_count))
+               goal = le32_to_cpu(es->s_first_data_block);
+
        /*
         * Here we try to allocate the requested multiple blocks at once,
         * on a best-effort basis.
@@ -534,6 +579,41 @@ static int ext3_alloc_blocks(handle_t *h
         */
        target = blks + indirect_blks;

+       /*
+        * Try to allocate indirect blocks in the metacluster region of block
+        * group in which goal falls. This should not only give us clustered
+        * metablock allocation, but also allocate new metablocks close to the
+        * corresponding data blocks (by putting them in the same block group).
+        * Note that allocation of indirect blocks is only guided by goal and
+        * not by reservation window since the goal mostly falls within the
+        * reservation window for sequential allocation.
+        * If the indirect blocks could not be allocated in this block group,
+        * we fall back to sequential allocation of indirect block alongside
+        * the data block instead of trying other block groups as that can
+        * separate indirect and data blocks too far out.
+        */
+       if (test_opt(sb, METACLUSTER) && indirect_blks) {
+               count = indirect_blks;
+               goal_group = (goal - le32_to_cpu(es->s_first_data_block)) /
+                               EXT3_BLOCKS_PER_GROUP(sb);
+               *err = ext3_new_indirect_blocks(handle, inode, goal_group,
+                                               &count, new_blocks + index);
+               if (*err && *err != -ENOSPC) {
+                       printk(KERN_ERR "ext3_alloc_blocks failed to allocate "
+                               "indirect blocks: %d", *err);
+                       goto failed_out;
+               } else if (*err == 0) {
+                       BUG_ON(count == 0);
+               }
+               *err = 0;
+
+               if (count > 0) {
+                       index += count;
+                       target -= count;
+                       BUG_ON(index > indirect_blks);
+               }
+       }
+
        while (1) {
                count = target;
                /* allocating blocks for indirect blocks and direct blocks */
@@ -542,7 +622,7 @@ static int ext3_alloc_blocks(handle_t *h
                        goto failed_out;

                target -= count;
-               /* allocate blocks for indirect blocks */
+               /* store indirect block numbers we just allocated */
                while (index < indirect_blks && count) {
                        new_blocks[index++] = current_block++;
                        count--;
@@ -570,10 +650,14 @@ failed_out:
  *     @inode: owner
  *     @indirect_blks: number of allocated indirect blocks
  *     @blks: number of allocated direct blocks
+ *     @goal: goal for allocation
  *     @offsets: offsets (in the blocks) to store the pointers to next.
  *     @branch: place to store the chain in.
  *
- *     This function allocates blocks, zeroes out all but the last one,
+ *     returns error and number of direct blocks allocated via *blks
+ *
+ *     This function allocates indirect_blks + *blks, zeroes out all
+ *     indirect blocks,
  *     links them into chain and (if we are synchronous) writes them to disk.
  *     In other words, it prepares a branch that can be spliced onto the
  *     inode. It stores the information about that chain in the branch[], in
@@ -799,17 +883,24 @@ int ext3_get_blocks_handle(handle_t *han
        int blocks_to_boundary = 0;
        int depth;
        struct ext3_inode_info *ei = EXT3_I(inode);
-       int count = 0;
+       int count = 0, ind_readahead;
        ext3_fsblk_t first_block = 0;

-
+       BUG_ON(!create &&
+               iblock >= (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+                                       inode->i_sb->s_blocksize_bits);
        J_ASSERT(handle != NULL || create == 0);
        depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);

        if (depth == 0)
                goto out;

-       partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+       ind_readahead = !create && depth > 2;
+       partial = ext3_get_branch(inode, depth, offsets, chain,
+                                 ind_readahead, &err);
+       if (!partial && ind_readahead)
+               partial = ext3_read_indblocks(inode, iblock, depth,
+                                             offsets, chain, &err);

        /* Simplest case - block found, no allocation needed */
        if (!partial) {
@@ -844,7 +935,7 @@ int ext3_get_blocks_handle(handle_t *han
        }

        /* Next simple case - plain lookup or failed read of indirect block */
-       if (!create || err == -EIO)
+       if (!create || (err && err != -EAGAIN))
                goto cleanup;

        mutex_lock(&ei->truncate_mutex);
@@ -866,7 +957,8 @@ int ext3_get_blocks_handle(handle_t *han
                        brelse(partial->bh);
                        partial--;
                }
-               partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+               partial = ext3_get_branch(inode, depth, offsets, chain, 0,
+                                       &err);
                if (!partial) {
                        count++;
                        mutex_unlock(&ei->truncate_mutex);
@@ -1974,7 +2066,7 @@ static Indirect *ext3_find_shared(struct
        /* Make k index the deepest non-null offest + 1 */
        for (k = depth; k > 1 && !offsets[k-1]; k--)
                ;
-       partial = ext3_get_branch(inode, k, offsets, chain, &err);
+       partial = ext3_get_branch(inode, k, offsets, chain, 0, &err);
        /* Writer: pointers */
        if (!partial)
                partial = chain + k-1;
@@ -3297,3 +3389,508 @@ int ext3_change_inode_journal_flag(struc

        return err;
 }
+
+/*
+ * ext3_ind_read_end_bio --
+ *
+ *     bio callback for read IO issued from ext3_read_indblocks.
+ *     Will be called only once, when all I/O has completed.
+ *     Frees read_info and bio.
+ */
+static void ext3_ind_read_end_bio(struct bio *bio, int err)
+{
+       struct ext3_ind_read_info *read_info = bio->bi_private;
+       struct buffer_head *bh;
+       int uptodate = !err && test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int i;
+
+       BUG_ON(read_info->count <= 0);
+
+       if (err == -EOPNOTSUPP)
+               set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+
+       for (i = 0; i < read_info->count; i++) {
+               bh = read_info->bh[i];
+               BUG_ON(bh == NULL);
+
+               if (err == -EOPNOTSUPP)
+                       set_bit(BH_Eopnotsupp, &bh->b_state);
+
+               if (uptodate) {
+                       BUG_ON(buffer_uptodate(bh));
+                       BUG_ON(ext3_buffer_prefetch(bh));
+                       set_buffer_uptodate(bh);
+                       if (read_info->seq_prefetch)
+                               ext3_set_buffer_prefetch(bh);
+               }
+
+               unlock_buffer(bh);
+               brelse(bh);
+       }
+
+       kfree(read_info);
+       bio_put(bio);
+}
+
+/*
+ * ext3_get_max_read --
+ *     @inode: inode of file.
+ *     @block: block number in file (starting from zero).
+ *     @offset_in_dind_block: offset of the indirect block inside it's
+ *     parent doubly-indirect block.
+ *
+ *      Compute the maximum no. of indirect blocks that can be read
+ *      satisfying following constraints:
+ *              - Don't read indirect blocks beyond the end of current
+ *              doubly-indirect block.
+ *              - Don't read beyond eof.
+ */
+static inline unsigned long ext3_get_max_read(const struct inode *inode,
+                                                 int block,
+                                                 int offset_in_dind_block)
+{
+       const struct super_block *sb = inode->i_sb;
+       unsigned long max_read;
+       unsigned long ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+       unsigned long ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
+       unsigned long blocks_in_file =
+               (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+       unsigned long remaining_ind_blks_in_dind =
+               (ptrs >= offset_in_dind_block) ? (ptrs - offset_in_dind_block)
+                                              : 0;
+       unsigned long remaining_ind_blks_before_eof =
+               ((blocks_in_file - EXT3_NDIR_BLOCKS + ptrs - 1) >> ptrs_bits) -
+               ((block - EXT3_NDIR_BLOCKS) >> ptrs_bits);
+
+       BUG_ON(block >= blocks_in_file);
+
+       max_read = min_t(unsigned long, remaining_ind_blks_in_dind,
+                        remaining_ind_blks_before_eof);
+
+       BUG_ON(max_read < 1);
+
+       return max_read;
+}
+
+static void ext3_read_indblocks_submit(struct bio **pbio,
+                                       struct ext3_ind_read_info **pread_info,
+                                       int *read_cnt, int seq_prefetch)
+{
+       struct bio *bio = *pbio;
+       struct ext3_ind_read_info *read_info = *pread_info;
+
+       BUG_ON(*read_cnt < 1);
+
+       read_info->seq_prefetch = seq_prefetch;
+       read_info->count = *read_cnt;
+       read_info->size = bio->bi_size;
+       bio->bi_private = read_info;
+       bio->bi_end_io = ext3_ind_read_end_bio;
+       submit_bio(READ, bio);
+
+       *pbio = NULL;
+       *pread_info = NULL;
+       *read_cnt = 0;
+}
+
+/*
+ * ext3_read_indblocks_async --
+ *      @sb:            super block
+ *      @ind_blocks[]:  array of indirect block numbers on disk
+ *      @count:         maximum number of indirect blocks to read
+ *      @first_bh:      buffer_head for indirect block ind_blocks[0], may be
+ *                      NULL
+ *      @seq_prefetch:  if this is part of a sequential prefetch and buffers'
+ *                      prefetch bit must be set.
+ *      @blocks_done:   number of blocks considered for prefetching.
+ *
+ *      Issue a single bio request to read upto count buffers identified in
+ *      ind_blocks[]. Fewer than count buffers may be read in some cases:
+ *      - If a buffer is found to be uptodate and it's prefetch bit is set, we
+ *      don't look at any more buffers as they will most likely be in
the cache.
+ *      - We skip buffers we cannot lock without blocking (except for first_bh
+ *                     read_info->seq_prefetch = seq_prefetch;
+                       read_info->count = read_cnt;
+                       read_info->size = bio->bi_size;
+                       bio->bi_private = read_info;
+                       bio->bi_end_io = ext3_ind_read_end_bio;
+                       submit_bio(READ, bio);
+      if specified).
+ *      - We skip buffers beyond a certain range on disk.
+ *
+ *      This function must issue read on first_bh if specified unless of course
+ *      it's already uptodate.
+ */
+static int ext3_read_indblocks_async(struct super_block *sb,
+                                    __le32 ind_blocks[], int count,
+                                    struct buffer_head *first_bh,
+                                    int seq_prefetch,
+                                    unsigned long *blocks_done)
+{
+       struct buffer_head *bh;
+       struct bio *bio = NULL;
+       struct ext3_ind_read_info *read_info = NULL;
+       int read_cnt = 0, blk;
+       ext3_fsblk_t prev_blk = 0, io_start_blk = 0, curr;
+       int err = 0;
+
+       BUG_ON(count < 1);
+       /* Don't move this to ext3_get_max_read() since callers often need to
+        * trim the count returned by that function. So this bound must only
+        * be imposed at the last moment. */
+       count = min_t(unsigned long, count, EXT3_IND_READ_MAX);
+       *blocks_done = 0UL;
+
+       if (count == 1 && first_bh) {
+               lock_buffer(first_bh);
+               get_bh(first_bh);
+               first_bh->b_end_io = end_buffer_read_sync;
+               submit_bh(READ, first_bh);
+               *blocks_done = 1UL;
+               return 0;
+       }
+
+       for (blk = 0; blk < count; blk++) {
+               curr = le32_to_cpu(ind_blocks[blk]);
+
+               if (!curr)
+                       continue;
+
+               if (io_start_blk > 0) {
+                       if (max(io_start_blk, curr) - min(io_start_blk, curr) >=
+                                       EXT3_IND_READ_MAX)
+                               continue;
+               }
+
+               if (prev_blk > 0 && curr != prev_blk + 1) {
+                       ext3_read_indblocks_submit(&bio, &read_info,
+                                               &read_cnt, seq_prefetch);
+                       prev_blk = 0;
+                       break;
+               }
+
+               if (blk == 0 && first_bh) {
+                       bh = first_bh;
+                       get_bh(first_bh);
+               } else {
+                       bh = sb_getblk(sb, curr);
+                       if (unlikely(!bh)) {
+                               err = -ENOMEM;
+                               goto failure;
+                       }
+               }
+
+               if (buffer_uptodate(bh)) {
+                       if (ext3_buffer_prefetch(bh)) {
+                               brelse(bh);
+                               break;
+                       }
+                       brelse(bh);
+                       continue;
+               }
+
+               /* Lock the buffer without blocking, skipping any buffers
+                * which would require us to block. first_bh when specified is
+                * an exception as caller typically wants it to be read for
+                * sure (e.g., ext3_read_indblocks_sync).
+                */
+               if (bh == first_bh) {
+                       lock_buffer(bh);
+               } else if (test_set_buffer_locked(bh)) {
+                       brelse(bh);
+                       continue;
+               }
+
+               /* Check again with the buffer locked. */
+               if (buffer_uptodate(bh)) {
+                       if (ext3_buffer_prefetch(bh)) {
+                               unlock_buffer(bh);
+                               brelse(bh);
+                               break;
+                       }
+                       unlock_buffer(bh);
+                       brelse(bh);
+                       continue;
+               }
+
+               if (read_cnt == 0) {
+                       /* read_info freed in ext3_ind_read_end_bio(). */
+                       read_info = kmalloc(EXT3_IND_READ_INFO_SIZE(count),
+                                           GFP_KERNEL);
+                       if (unlikely(!read_info)) {
+                               err = -ENOMEM;
+                               goto failure;
+                       }
+
+                       bio = bio_alloc(GFP_KERNEL, count);
+                       if (unlikely(!bio)) {
+                               err = -ENOMEM;
+                               goto failure;
+                       }
+                       bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+                       bio->bi_bdev = bh->b_bdev;
+               }
+
+               if (bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))
+                               < bh->b_size) {
+                       brelse(bh);
+                       if (read_cnt == 0)
+                               goto failure;
+
+                       break;
+               }
+
+               read_info->bh[read_cnt++] = bh;
+
+               prev_blk = curr;
+               if (io_start_blk == 0)
+                       io_start_blk = curr;
+       }
+
+       if (read_cnt == 0)
+               goto done;
+
+       ext3_read_indblocks_submit(&bio, &read_info, &read_cnt, seq_prefetch);
+
+       *blocks_done = blk;
+       return 0;
+
+failure:
+       while (--read_cnt >= 0) {
+               unlock_buffer(read_info->bh[read_cnt]);
+               brelse(read_info->bh[read_cnt]);
+       }
+
+done:
+       if (read_info)
+               kfree(read_info);
+
+       if (bio)
+               bio_put(bio);
+
+       return err;
+}
+
+/*
+ * ext3_read_indblocks_sync --
+ *      @sb:            super block
+ *      @ind_blocks[]:  array of indirect block numbers on disk
+ *      @count:         maximum number of indirect blocks to read
+ *      @first_bh:      buffer_head for indirect block ind_blocks[0], must be
+ *                      non-NULL.
+ *      @seq_prefetch:  set prefetch bit of buffers, used when this is part of
+ *                      a sequential prefetch.
+ *      @blocks_done:   number of blocks considered for prefetching.
+ *
+ *      Synchronously read at most count indirect blocks listed in
+ *      ind_blocks[]. This function calls ext3_read_indblocks_async() to do all
+ *      the hard work. It waits for read to complete on first_bh before
+ *      returning.
+ */
+
+static int ext3_read_indblocks_sync(struct super_block *sb,
+                                   __le32 ind_blocks[], int count,
+                                   struct buffer_head *first_bh,
+                                   int seq_prefetch,
+                                   unsigned long *blocks_done)
+{
+       int err;
+
+       BUG_ON(count < 1);
+       BUG_ON(!first_bh);
+
+       err = ext3_read_indblocks_async(sb, ind_blocks, count, first_bh,
+                                       seq_prefetch, blocks_done);
+       if (err)
+               return err;
+
+       wait_on_buffer(first_bh);
+       if (!buffer_uptodate(first_bh))
+               err = -EIO;
+
+       /* if seq_prefetch != 0, ext3_read_indblocks_async() sets prefetch bit
+        * for all buffers, but the first buffer for sync IO is never a prefetch
+        * buffer since it's needed presently so mark it so.
+        */
+       if (seq_prefetch)
+               ext3_clear_buffer_prefetch(first_bh);
+
+       BUG_ON(ext3_buffer_prefetch(first_bh));
+
+       return err;
+}
+
+/*
+ * ext3_read_indblocks --
+ *
+ *     @inode: inode of file
+ *     @iblock: block number inside file (starting from 0).
+ *     @depth: depth of path from inode to data block.
+ *     @offsets: array of offsets within blocks identified in 'chain'.
+ *     @chain: array of Indirect with info about all levels of blocks until
+ *     the data block.
+ *     @err: error pointer.
+ *
+ *     This function is called after reading all metablocks leading to 'iblock'
+ *     except the (singly) indirect block. It reads the indirect block if not
+ *     already in the cache and may also prefetch next few indirect blocks.
+ *     It uses a combination of synchronous and asynchronous requests to
+ *     accomplish this. We do prefetching even for random reads by reading
+ *     ahead one indirect block since reads of size >=512KB have at least 12%
+ *     chance of spanning two indirect blocks.
+ */
+
+static Indirect *ext3_read_indblocks(struct inode *inode, int iblock,
+                                    int depth, int offsets[4],
+                                    Indirect chain[4], int *err)
+{
+       struct super_block *sb = inode->i_sb;
+       struct buffer_head *first_bh, *prev_bh;
+       unsigned long max_read, blocks_done = 0;
+       __le32 *ind_blocks;
+
+       /* Must have doubly indirect block for prefetching indirect blocks. */
+       BUG_ON(depth <= 2);
+       BUG_ON(!chain[depth-2].key);
+
+       *err = 0;
+
+       /* Handle first block */
+       ind_blocks = chain[depth-2].p;
+       first_bh = sb_getblk(sb, le32_to_cpu(ind_blocks[0]));
+       if (unlikely(!first_bh)) {
+               printk(KERN_ERR "Failed to get block %u for sb %p\n",
+                      le32_to_cpu(ind_blocks[0]), sb);
+               goto failure;
+       }
+
+       BUG_ON(first_bh->b_size != sb->s_blocksize);
+
+       if (buffer_uptodate(first_bh)) {
+               /* Found the buffer in cache, either it was accessed recently or
+                * it was prefetched while reading previous indirect block(s).
+                * We need to figure out if we need to prefetch the following
+                * indirect blocks.
+                */
+               if (!ext3_buffer_prefetch(first_bh)) {
+                       /* Either we've seen this indirect block before while
+                        * accessing another data block, or this is a random
+                        * read. In the former case, we must have done the
+                        * needful the first time we had a cache hit on this
+                        * indirect block, in the latter case we obviously
+                        * don't need to do any prefetching.
+                        */
+                       goto done;
+               }
+
+               max_read = ext3_get_max_read(inode, iblock,
+                                            offsets[depth-2]);
+
+               /* This indirect block is in the cache due to prefetching and
+                * this is its first cache hit, clear the prefetch bit and
+                * make sure the following blocks are also prefetched.
+                */
+               ext3_clear_buffer_prefetch(first_bh);
+
+               if (max_read >= 2) {
+                       /* ext3_read_indblocks_async() stops at the first
+                        * indirect block which has the prefetch bit set which
+                        * will most likely be the very next indirect block.
+                        */
+                       ext3_read_indblocks_async(sb, &ind_blocks[1],
+                                                 max_read - 1,
+                                                 NULL, 1, &blocks_done);
+               }
+
+       } else {
+               /* Buffer is not in memory, we need to read it. If we are
+                * reading sequentially from the previous indirect block, we
+                * have just detected a sequential read and we must prefetch
+                * some indirect blocks for future.
+                */
+
+               max_read = ext3_get_max_read(inode, iblock,
+                                            offsets[depth-2]);
+
+               if ((ind_blocks - (__le32 *)chain[depth-2].bh->b_data) >= 1) {
+                       prev_bh = sb_getblk(sb, le32_to_cpu(ind_blocks[-1]));
+                       if (buffer_uptodate(prev_bh) &&
+                           !ext3_buffer_prefetch(prev_bh)) {
+                               /* Detected sequential read. */
+                               brelse(prev_bh);
+
+                               /* Sync read indirect block, also read the next
+                                * few indirect blocks.
+                                */
+                               *err = ext3_read_indblocks_sync(sb, ind_blocks,
+                                                        max_read, first_bh, 1,
+                                                        &blocks_done);
+
+                               if (*err)
+                                       goto out;
+
+                               /* In case the very next indirect block is
+                                * discontiguous by a non-trivial amount,
+                                * ext3_read_indblocks_sync() above won't
+                                * prefetch it (indicated by blocks_done < 2).
+                                * So to help sequential read, schedule an
+                                * async request for reading the next
+                                * contiguous indirect block range (which
+                                * in metaclustering case would be the next
+                                * metacluster, without metaclustering it
+                                * would be the next indirect block). This is
+                                * expected to benefit the non-metaclustering
+                                * case.
+                                */
+                               if (max_read >= 2 && blocks_done < 2)
+                                       ext3_read_indblocks_async(sb,
+                                                       &ind_blocks[1],
+                                                       max_read - 1,
+                                                       NULL, 1, &blocks_done);
+
+                               goto done;
+                       }
+                       brelse(prev_bh);
+               }
+
+               /* Either random read, or sequential detection failed above.
+                * We always prefetch the next indirect block in this case
+                * whenever possible.
+                * This is because for random reads of size ~512KB, there is
+                * >12% chance that a read will span two indirect blocks.
+                */
+               *err = ext3_read_indblocks_sync(sb, ind_blocks,
+                                               (max_read >= 2) ? 2 : 1,
+                                               first_bh, 0, &blocks_done);
+               if (*err)
+                       goto out;
+       }
+
+done:
+       /* Reader: pointers */
+       if (!verify_chain(chain, &chain[depth - 2])) {
+               brelse(first_bh);
+               goto changed;
+       }
+       add_chain(&chain[depth - 1], first_bh,
+                 (__le32*)first_bh->b_data + offsets[depth - 1]);
+       /* Reader: end */
+       if (!chain[depth - 1].key)
+               goto out;
+
+       BUG_ON(!buffer_uptodate(first_bh));
+       return NULL;
+
+changed:
+       *err = -EAGAIN;
+       goto out;
+failure:
+       *err = -EIO;
+out:
+       if (*err) {
+               ext3_debug("Error %d reading indirect blocks\n", *err);
+               return &chain[depth - 2];
+       } else
+               return &chain[depth - 1];
+}
+
diff -uprdN linux-2.6.23mm1-clean/fs/ext3/super.c
linux-2.6.23mm1-ext3mc/fs/ext3/super.c
--- linux-2.6.23mm1-clean/fs/ext3/super.c       2007-10-17 18:31:42.000000000 
-0700
+++ linux-2.6.23mm1-ext3mc/fs/ext3/super.c      2007-11-09 16:46:29.000000000 
-0800
@@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_
        else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
                seq_puts(seq, ",data=writeback");

+       if (test_opt(sb, METACLUSTER))
+               seq_puts(seq, ",metacluster");
+
        ext3_show_quota_options(seq, sb);

        return 0;
@@ -758,7 +761,7 @@ enum {
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
-       Opt_grpquota
+       Opt_grpquota, Opt_metacluster
 };

 static match_table_t tokens = {
@@ -808,6 +811,7 @@ static match_table_t tokens = {
        {Opt_quota, "quota"},
        {Opt_usrquota, "usrquota"},
        {Opt_barrier, "barrier=%u"},
+       {Opt_metacluster, "metacluster"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
 };
@@ -1140,6 +1144,9 @@ clear_qf_name:
                case Opt_bh:
                        clear_opt(sbi->s_mount_opt, NOBH);
                        break;
+               case Opt_metacluster:
+                       set_opt(sbi->s_mount_opt, METACLUSTER);
+                       break;
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
diff -uprdN linux-2.6.23mm1-clean/include/linux/ext3_fs.h
linux-2.6.23mm1-ext3mc/include/linux/ext3_fs.h
--- linux-2.6.23mm1-clean/include/linux/ext3_fs.h       2007-10-17
18:31:43.000000000 -0700
+++ linux-2.6.23mm1-ext3mc/include/linux/ext3_fs.h      2007-11-15
12:03:48.000000000 -0800
@@ -380,6 +380,7 @@ struct ext3_inode {
 #define EXT3_MOUNT_QUOTA               0x80000 /* Some quota option set */
 #define EXT3_MOUNT_USRQUOTA            0x100000 /* "old" user quota */
 #define EXT3_MOUNT_GRPQUOTA            0x200000 /* "old" group quota */
+#define EXT3_MOUNT_METACLUSTER         0x400000 /* Indirect block clustering */

 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
@@ -493,6 +494,7 @@ struct ext3_super_block {
 #ifdef __KERNEL__
 #include <linux/ext3_fs_i.h>
 #include <linux/ext3_fs_sb.h>
+#include <linux/buffer_head.h>
 static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
 {
        return sb->s_fs_info;
@@ -722,6 +724,11 @@ struct dir_private_info {
        __u32           next_hash;
 };

+/* Special bh flag used by the metacluster readahead logic. */
+enum ext3_bh_state_bits {
+       EXT3_BH_PREFETCH = BH_JBD_Sentinel,
+};
+
 /* calculate the first block number of the group */
 static inline ext3_fsblk_t
 ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
@@ -730,6 +737,24 @@ ext3_group_first_block_no(struct super_b
                le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
 }

+static inline void
+ext3_set_buffer_prefetch(struct buffer_head *bh)
+{
+       set_bit(EXT3_BH_PREFETCH, &bh->b_state);
+}
+
+static inline void
+ext3_clear_buffer_prefetch(struct buffer_head *bh)
+{
+       clear_bit(EXT3_BH_PREFETCH, &bh->b_state);
+}
+
+static inline int
+ext3_buffer_prefetch(struct buffer_head *bh)
+{
+       return test_bit(EXT3_BH_PREFETCH, &bh->b_state);
+}
+
 /*
  * Special error return code only used by dx_probe() and its callers.
  */
@@ -752,6 +777,9 @@ extern int ext3_bg_has_super(struct supe
 extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
 extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
                        ext3_fsblk_t goal, int *errp);
+extern int ext3_new_indirect_blocks(handle_t *handle, struct inode *,
+                               unsigned long group_no, unsigned long *,
+                               ext3_fsblk_t new_blocks[]);
 extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
                        ext3_fsblk_t goal, unsigned long *count, int *errp);
 extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
@@ -870,6 +898,31 @@ extern const struct inode_operations ext
 extern const struct inode_operations ext3_symlink_inode_operations;
 extern const struct inode_operations ext3_fast_symlink_inode_operations;

+/*
+ * ext3_get_grp_metacluster:
+ *
+ *     Determines metacluster block range for all block groups of the file
+ *     system.
+ *
+ *     Number of metacluster blocks = blocks_per_group/128. This allows us
+ *     to fit all indirect blocks in a block group with average file size of
+ *     256KB into the group's metacluster. We want to avoid having large
+ *     metaclusters because then we'll run of data blocks sooner and when
+ *     out of data blocks metaclustering goes for a toss.
+ *     
+ */
+static inline void
+ext3_get_grp_metacluster(struct super_block *sb,
+                               ext3_grpblk_t *mc_start,
+                               ext3_grpblk_t *mc_end)  /* exclusive */
+{
+       *mc_start = EXT3_BLOCKS_PER_GROUP(sb) / 2;
+       if (test_opt(sb, METACLUSTER)) {
+               *mc_end = *mc_start + (EXT3_BLOCKS_PER_GROUP(sb) >> 7);
+       } else {
+               *mc_end = *mc_start;
+       }
+}

 #endif /* __KERNEL__ */

diff -uprdN linux-2.6.23mm1-clean/include/linux/jbd.h
linux-2.6.23mm1-ext3mc/include/linux/jbd.h
--- linux-2.6.23mm1-clean/include/linux/jbd.h   2007-10-17
18:31:43.000000000 -0700
+++ linux-2.6.23mm1-ext3mc/include/linux/jbd.h  2007-11-09
16:46:29.000000000 -0800
@@ -294,6 +294,7 @@ enum jbd_state_bits {
        BH_State,               /* Pins most journal_head state */
        BH_JournalHead,         /* Pins bh->b_private and jh->b_bh */
        BH_Unshadow,            /* Dummy bit, for BJ_Shadow wakeup filtering */
+       BH_JBD_Sentinel,        /* Start bit for clients of jbd */
 };

 BUFFER_FNS(JBD, jbd)
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to