FLEX_BG support for the e2fsprogs interim branch.

2008-02-19 Thread Jose R. Santos
Hi Ted,

I've attached two patches to add FLEX_BG support on the interim branch
of e2fsprogs.  The first patch is the grouping of 3 patches already
available in the e2fsprogs-next branch for basic feature support while
the second patch is the meta-data grouping patch that is still waiting
on the e2fsprogs-pu branch.


-JRS
From: Jose R. Santos <[EMAIL PROTECTED]>

This patch add basic flex_bg support to the e2fsprogs-interim branch.

---

 e2fsck/super.c  |7 +--
 lib/e2p/feature.c   |2 ++
 lib/ext2fs/check_desc.c |9 ++---
 lib/ext2fs/ext2_fs.h|1 +
 lib/ext2fs/ext2fs.h |6 --
 misc/mke2fs.c   |3 ++-
 misc/tune2fs.c  |   13 +
 7 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/e2fsck/super.c b/e2fsck/super.c
index 0b17c48..581e8fe 100644
--- a/e2fsck/super.c
+++ b/e2fsck/super.c
@@ -584,8 +584,11 @@ void check_super_block(e2fsck_t ctx)
 	for (i = 0, gd=fs->group_desc; i < fs->group_desc_count; i++, gd++) {
 		pctx.group = i;
 
-		first_block = ext2fs_group_first_block(fs, i);
-		last_block = ext2fs_group_last_block(fs, i);
+		if (!EXT2_HAS_INCOMPAT_FEATURE(fs->super,
+	   EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+			first_block = ext2fs_group_first_block(fs, i);
+			last_block = ext2fs_group_last_block(fs, i);
+		}
 
 		if ((gd->bg_block_bitmap < first_block) ||
 		(gd->bg_block_bitmap > last_block)) {
diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
index f111ddd..a9791b4 100644
--- a/lib/e2p/feature.c
+++ b/lib/e2p/feature.c
@@ -67,6 +67,8 @@ static struct feature feature_list[] = {
 			"extent" },
 	{	E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_64BIT,
 			"64bit" },
+	{   E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_FLEX_BG,
+			"flex_bg"},
 	{   E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_MMP,
 			"mmp" },
 	{	0, 0, 0 },
diff --git a/lib/ext2fs/check_desc.c b/lib/ext2fs/check_desc.c
index 146f9e5..900b179 100644
--- a/lib/ext2fs/check_desc.c
+++ b/lib/ext2fs/check_desc.c
@@ -33,13 +33,16 @@ errcode_t ext2fs_check_desc(ext2_filsys fs)
 {
 	dgrp_t i;
 	blk_t first_block = fs->super->s_first_data_block;
-	blk_t last_block;
+	blk_t last_block = fs->super->s_blocks_count-1;
 
 	EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);
 
 	for (i = 0; i < fs->group_desc_count; i++) {
-		first_block = ext2fs_group_first_block(fs, i);
-		last_block = ext2fs_group_last_block(fs, i);
+		if (!EXT2_HAS_INCOMPAT_FEATURE(fs->super,
+	   EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+			first_block = ext2fs_group_first_block(fs, i);
+			last_block = ext2fs_group_last_block(fs, i);
+		}
 
 		/*
 		 * Check to make sure block bitmap for group is
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 9218e42..412b49b 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -661,6 +661,7 @@ struct ext2_super_block {
 #define EXT3_FEATURE_INCOMPAT_EXTENTS		0x0040
 #define EXT4_FEATURE_INCOMPAT_64BIT		0x0080
 #define EXT4_FEATURE_INCOMPAT_MMP		0x0100
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG		0x0200
 
 
 #define EXT2_FEATURE_COMPAT_SUPP	0
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 1d86fa1..b34aff1 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -482,14 +482,16 @@ typedef struct ext2_icount *ext2_icount_t;
 	 EXT2_FEATURE_INCOMPAT_META_BG|\
 	 EXT3_FEATURE_INCOMPAT_EXTENTS|\
 	 EXT3_FEATURE_INCOMPAT_RECOVER|\
-	 EXT4_FEATURE_INCOMPAT_MMP)
+	 EXT4_FEATURE_INCOMPAT_MMP|\
+	 EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #else
 #define EXT2_LIB_FEATURE_INCOMPAT_SUPP	(EXT2_FEATURE_INCOMPAT_FILETYPE|\
 	 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|\
 	 EXT2_FEATURE_INCOMPAT_META_BG|\
 	 EXT3_FEATURE_INCOMPAT_EXTENTS|\
 	 EXT3_FEATURE_INCOMPAT_RECOVER|\
-	 EXT4_FEATURE_INCOMPAT_MMP)
+	 EXT4_FEATURE_INCOMPAT_MMP|\
+	 EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #endif
 #define EXT2_LIB_FEATURE_RO_COMPAT_SUPP	(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
 	 EXT2_FEATURE_RO_COMPAT_LARGE_FILE|\
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 8210c3b..0184af7 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -922,7 +922,8 @@ static __u32 ok_features[3] = {
 	EXT2_FEATURE_INCOMPAT_FILETYPE|		/* Incompat */
 		EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|
 		EXT2_FEATURE_INCOMPAT_META_BG|
-		EXT4_FEATURE_INCOMPAT_MMP,
+		EXT4_FEATURE_INCOMPAT_MMP|
+		EXT4_FEATURE_INCOMPAT_FLEX_BG,
 	EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|	/* R/O compat */
 		EXT4_FEATURE_RO_COMPAT_GDT_CSUM
 };
diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index fad4812..d37ceb1 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -303,6 +303,7 @@ static void update_feature_set(ext2_filsys fs, char *features)
 {
 	int sparse, old_sparse, filetype, old_filetype;
 	int journal, old_journal, dxdir, old_dxdir, uninit;
+	int flex_bg, old_flex_bg;
 	int mmp, old_mmp;
 	struct ext2_super_block *sb= fs->super;
 	int dir_nlin

[PATCH] e2fsprogs: New bitmap and inode table allocation for FLEX_BG v2

2008-02-13 Thread Jose R. Santos
New bitmap and inode table allocation for FLEX_BG

From: Jose R. Santos <[EMAIL PROTECTED]>

Change the way we allocate bitmaps and inode tables if the FLEX_BG
feature is used at mke2fs time.  It places calculates a new offset for
bitmaps and inode table base on the number of groups that the user
wishes to pack together using the new "-G" option.  Creating a
filesystem with 64 block groups in a flex group can be done by:

mke2fs -j -I 256 -O flex_bg -G 32 /dev/sdX

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-off-by: Valerie Clement <[EMAIL PROTECTED]>
---

 lib/ext2fs/alloc_tables.c |  122 -
 lib/ext2fs/closefs.c  |6 +-
 lib/ext2fs/ext2_fs.h  |6 ++
 lib/ext2fs/initialize.c   |6 ++
 misc/mke2fs.8.in  |   15 ++
 misc/mke2fs.c |   24 -
 6 files changed, 171 insertions(+), 8 deletions(-)

diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..043293b 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -27,18 +27,88 @@
 #include "ext2_fs.h"
 #include "ext2fs.h"
 
+void ext2fs_bgd_set_flex_meta_flag(ext2_filsys fs, blk_t block)
+{
+   dgrp_t  group;
+
+   group = ext2fs_group_of_blk(fs, block);
+   if (!(fs->group_desc[group].bg_flags & EXT2_BG_FLEX_METADATA))
+   fs->group_desc[group].bg_flags |= EXT2_BG_FLEX_METADATA;
+}
+
+/*
+ * This routine searches for free blocks that can allocate a full
+ * group of bitmaps or inode tables for a flexbg group.  Returns the
+ * block number with a correct offset were the bitmaps and inode
+ * tables can be allocated continously and in order.
+ */
+blk_t ext2fs_flexbg_offset(ext2_filsys fs, dgrp_t group, blk_t start_blk,
+  ext2fs_block_bitmap bmap, int offset, int size)
+{
+   int flexbg, flexbg_size, elem_size;
+   blk_t   last_blk, first_free = 0;
+   dgrp_t  last_grp;
+
+   flexbg_size = 1 << fs->super->s_log_groups_per_flex;
+   flexbg = group / flexbg_size;
+
+   if (size > fs->super->s_blocks_per_group / 8)
+   size = fs->super->s_blocks_per_group / 8;
+
+   /*
+* Dont do a long search if the previous block
+* search is still valid.
+*/
+   if (start_blk && group % flexbg_size) {
+   if (size > flexbg_size)
+   elem_size = fs->inode_blocks_per_group;
+   else
+   elem_size = 1;
+   if (ext2fs_test_block_bitmap_range(bmap, start_blk + elem_size,
+  size))
+   return start_blk + elem_size;
+   }
+
+   start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg);
+   last_grp = group | (flexbg_size - 1);
+   if (last_grp > fs->group_desc_count)
+   last_grp = fs->group_desc_count;
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+
+   /* Find the first available block */
+   if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap,
+  &first_free))
+   return first_free;
+
+   if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size,
+  bmap, &first_free))
+   return first_free;
+
+   return first_free;
+}
+
 errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
  ext2fs_block_bitmap bmap)
 {
errcode_t   retval;
blk_t   group_blk, start_blk, last_blk, new_blk, blk;
-   int j;
+   dgrp_t  last_grp;
+   int j, rem_grps, flexbg_size = 0;
 
group_blk = ext2fs_group_first_block(fs, group);
last_blk = ext2fs_group_last_block(fs, group);
 
if (!bmap)
bmap = fs->block_map;
+
+   if (EXT2_HAS_INCOMPAT_FEATURE(fs->super,
+  EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+   flexbg_size = 1 << fs->super->s_log_groups_per_flex;
+   last_grp = group | (flexbg_size - 1);
+   rem_grps = last_grp - group;
+   if (last_grp > fs->group_desc_count)
+   last_grp = fs->group_desc_count;
+   }

/*
 * Allocate the block and inode bitmaps, if necessary
@@ -56,6 +126,15 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
} else
start_blk = group_blk;
 
+   if (flexbg_size) {
+   int prev_block = 0;
+   if (group && fs->group_desc[group-1].bg_block_bitmap)
+   prev_block = fs->group_desc[group-1].bg_block_bitmap;
+   start_blk = ext2fs_flexbg_offse

Re: [PATCH] e2fsprogs: New bitmap and inode table allocation for FLEX_BG

2008-02-11 Thread Jose R. Santos
On Sun, 10 Feb 2008 23:33:51 -0500
Theodore Tso <[EMAIL PROTECTED]> wrote:

> On Fri, Feb 08, 2008 at 11:37:40AM -0600, Jose R. Santos wrote:
> > > >  #define EXT2_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not 
> > > > initialized */
> > > >  #define EXT2_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not initialized 
> > > > */
> > > > +#define EXT2_BG_FLEX_METADATA  0x0004 /* FLEX_BG block group contains 
> > > > meta-data */
> > > 
> > > Hrm, I thought I had reserved that value in the uninit_groups patch?
> > > +#define EXT3_BG_INODE_ZEROED   0x0004  /* On-disk itable initialized to 
> > > zero */
> > 
> > I may have been, I just based the patch on the next branch as Ted had
> > ask for new e2fsprog patches.  The uninit group patch was not part of
> > the next branch when I pulled.
> 
> Yes, but whenever you start reserving code points that impact the
> on-disk format, you need to be careful and coordinate.  Exactly is the
> purpose of this flag, and why is it here?

Will fix.

> And I don't see any patch in the kernel patch queue that uses this
> flag.  Is this intended for internal use inside e2fsprogs?  If so,
> this might not be the best place for it.
> 
>- Ted

Currently, this is only used in e2fsprogs to determine which groups to
avoid when setting the EXT2_BG_BLOCK_UNINIT.  It will be use on
ext4_init_block_bitmap() to return the right number of free block when
a block group does not have any meta-data in it.  Eventually, it would
be nice to accurately and efficiently calculate the number of meta data
block used for a flexbg and be able to have these block groups
uninitialized as well.  This flag will be use to determine which groups
need to have their meta data block usage calculated.

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] e2fsprogs: New bitmap and inode table allocation for FLEX_BG

2008-02-08 Thread Jose R. Santos
On Fri, 08 Feb 2008 00:11:16 -0500
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Feb 07, 2008  11:09 -0600, Jose R. Santos wrote:
> > +blk_t ext2fs_flexbg_offset(ext2_filsys fs, dgrp_t group, blk_t start_blk,
> > +  ext2fs_block_bitmap bmap, int offset, int size)
> > +{
> 
> Can you add a comment on the intent of this function.  By the name it seems
> to be calculating some offset relative to the flex group, but that doesn't
> convey anything about searching for free blocks???

I will add a comment.  The function calculates where the search of
bitmaps/inode tables for a give block group starts by returning a block
number where all of the bitmaps/inode tables can be allocated in a
contiguous fashion.  The search for free blocks is needed determine
where within the flex group we can allocated the meta-data.

> 
> > +   /*
> > +* Dont do a long search if the previous block
> > +* search is still valid.
> > +*/
> 
> What condition here makse the previous search still valid?

We pass the previous allocation as an argument to the function.  If the
is enough space to allocate the rest of the inode tables after the
previous allocation, then no need to do a search.  There are two
reasons why this is done.

1) If the size of the of a flexbg is big enough, searching for
inode_blocks_per_group * flexbg becomes very expensive if there happens
to be some blocks in the middle of where we started the search.  This
easy happens if the size of all the inode tables in a flex group are
larger than a single block group.  If the next block group has super
block backups or meta_bg blocks ext2fs_get_free_blocks() becomes very
expensive.  If we have to do such an expensive search, better do it
once.  This is also a problem when resizing since there is no telling
what block usage of those last groups would be.

2) This avoids having inode_tables or bitmaps being allocated out of
order.  The search for very large blocks can leave empty space at the
begining of a flex group.  The search for the last groups in the flex
group could actually be place in these smaller empty blocks which would
put things out of order.

> > +   if (start_blk && group % flexbg_size) {
> > +   if (size > flexbg_size)
> > +   elem_size = fs->inode_blocks_per_group;
> > +   else
> > +   elem_size = 1;
> > +   if (ext2fs_test_block_bitmap_range(bmap, start_blk + elem_size,
> > +  size))
> > +   return start_blk + elem_size;
> > +   }
> > +
> > +   start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg);
> > +   last_grp = (group + (flexbg_size - (group % flexbg_size) - 1));
> 
> This is a confusing calculation...  Is it trying to find the last group
> in the flex group that "group" is part of?  I.e. round "group" to the
> end of the flex group?  Since flexbg_size is a power-of-two value, you
> could just do "last_grp = group | (flexbg_size - 1)"?

Yes, I will fix that.
 
> > +   last_blk = ext2fs_group_last_block(fs, last_grp);
> > +   if (last_grp > fs->group_desc_count)
> > +   last_grp = fs->group_desc_count;
> 
> Doesn't it make more sense to calculate "last_blk" AFTER limiting
> "last_grp" to the actual number of groups in the filesystem?

Ops...  Thanks for catching.
 
> > +   /* Find the first available block */
> > +   if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap,
> > +  &first_free))
> > +   return first_free;
> > +
> > +   if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size,
> > +  bmap, &first_free))
> > +   return first_free;
> 
> I don't quite understand this either?  The first search is looking for a
> single free block between "start_blk" and "last_blk", while the second
> search is looking for "size" free blocks between "first_free + offset"
> and "last_blk".  What is the reason to do the second search after doing
> the first one, or alternately just doing the second one directly?

Because the second search starts from the first free block + an
offset.  This is used in order to have space to allocate each group of
inode/block bitmaps and inode tables contiguously.  Cant do the second
search directly without knowing where I should start the search.

> Should both of these calls actually be saving the error return value and
> returning that to a caller (returning first_free via a pointer arg like
> ext2fs_get_free_blocks() does)?

Failure to find a contiguous set of blocks for all th

[PATCH] e2fsprogs: New bitmap and inode table allocation for FLEX_BG

2008-02-07 Thread Jose R. Santos
New bitmap and inode table allocation for FLEX_BG

From: Jose R. Santos <[EMAIL PROTECTED]>

Change the way we allocate bitmaps and inode tables if the FLEX_BG
feature is used at mke2fs time.  It places calculates a new offset for
bitmaps and inode table base on the number of groups that the user
wishes to pack together using the new "-G" option.  Creating a
filesystem with 64 block groups in a flex group can be done by:

mke2fs -j -I 256 -O flex_bg -G 32 /dev/sdX

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-off-by: Valerie Clement <[EMAIL PROTECTED]>
---

 lib/ext2fs/alloc_tables.c |  116 -
 lib/ext2fs/closefs.c  |6 ++
 lib/ext2fs/ext2_fs.h  |6 ++
 lib/ext2fs/initialize.c   |6 ++
 misc/mke2fs.c |   25 +-
 5 files changed, 151 insertions(+), 8 deletions(-)


diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..8281858 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -27,18 +27,82 @@
 #include "ext2_fs.h"
 #include "ext2fs.h"
 
+void ext2fs_bgd_set_flex_meta_flag(ext2_filsys fs, blk_t block)
+{
+   dgrp_t  group;
+
+   group = ext2fs_group_of_blk(fs, block);
+   if (!(fs->group_desc[group].bg_flags & EXT2_BG_FLEX_METADATA))
+   fs->group_desc[group].bg_flags |= EXT2_BG_FLEX_METADATA;
+}
+
+blk_t ext2fs_flexbg_offset(ext2_filsys fs, dgrp_t group, blk_t start_blk,
+  ext2fs_block_bitmap bmap, int offset, int size)
+{
+   int flexbg, flexbg_size, elem_size;
+   blk_t   last_blk, first_free = 0;
+   dgrp_t  last_grp;
+
+   flexbg_size = 1 << fs->super->s_log_groups_per_flex;
+   flexbg = group / flexbg_size;
+
+   if (size > fs->super->s_blocks_per_group / 8)
+   size = fs->super->s_blocks_per_group / 8;
+
+   /*
+* Dont do a long search if the previous block
+* search is still valid.
+*/
+   if (start_blk && group % flexbg_size) {
+   if (size > flexbg_size)
+   elem_size = fs->inode_blocks_per_group;
+   else
+   elem_size = 1;
+   if (ext2fs_test_block_bitmap_range(bmap, start_blk + elem_size,
+  size))
+   return start_blk + elem_size;
+   }
+
+   start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg);
+   last_grp = (group + (flexbg_size - (group % flexbg_size) - 1));
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   if (last_grp > fs->group_desc_count)
+   last_grp = fs->group_desc_count;
+
+   /* Find the first available block */
+   if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap,
+  &first_free))
+   return first_free;
+
+   if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size,
+  bmap, &first_free))
+   return first_free;
+
+   return first_free;
+}
+
 errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
  ext2fs_block_bitmap bmap)
 {
errcode_t   retval;
blk_t   group_blk, start_blk, last_blk, new_blk, blk;
-   int j;
+   dgrp_t  last_grp;
+   int j, rem_grps, flexbg_size = 0;
 
group_blk = ext2fs_group_first_block(fs, group);
last_blk = ext2fs_group_last_block(fs, group);
 
if (!bmap)
bmap = fs->block_map;
+
+   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
+  EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+   flexbg_size = 1 << fs->super->s_log_groups_per_flex;
+   rem_grps = flexbg_size - (group % flexbg_size);
+   last_grp = group + rem_grps - 1;
+   if (last_grp > fs->group_desc_count)
+   last_grp = fs->group_desc_count;
+   }

/*
 * Allocate the block and inode bitmaps, if necessary
@@ -56,6 +120,15 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
} else
start_blk = group_blk;
 
+   if (flexbg_size) {
+   int prev_block = 0;
+   if (group && fs->group_desc[group-1].bg_block_bitmap)
+   prev_block = fs->group_desc[group-1].bg_block_bitmap;
+   start_blk = ext2fs_flexbg_offset(fs, group, prev_block, bmap,
+0, rem_grps);
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   }
+
if (!fs->group_desc[group].bg_block_bitmap) {
retval = ext2fs_get_free_blocks(fs, s

Re: [PATCH] New inode allocation for FLEX_BG meta-data groups.

2008-01-11 Thread Jose R. Santos
On Fri, 11 Jan 2008 14:46:58 -0700
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Jan 11, 2008  11:28 -0600, Jose R. Santos wrote:
> > @@ -127,6 +127,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, 
> > struct buffer_head *bh,
> > mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
> > }
> >  
> > +   if (sbi->s_log_groups_per_flex)
> > +   return free_blocks;
> > return free_blocks - sbi->s_itb_per_group - 2;
> 
> To be honest, I think this is a wart in ext4_init_block_bitmap() that
> it returns the number of free blocks in the group.  That value should
> really be set at mke2fs or e2fsck time, and if the last group is marked
> BLOCK_UNINIT it gets the free blocks count wrong because it always starts
> with EXT4_BLOCKS_PER_GROUP().
> 
> The above patch may also be incorrect since there may be inode tables or
> bitmaps in the above group even in the case of FLEX_BG filesystems.
> 
> > +#define free_block_ratio 10
> > +
> > +static int find_group_flex(struct super_block *sb, struct inode *parent, 
> > ext4_group_t *best_group)
> > +{
> > +   n_fbg_groups = (sbi->s_groups_count + flex_size - 1) / flex_size;
> 
> Can be a shift?

You're right.  This should be:

n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> 
sbi->s_log_groups_per_flex;

> I would suggest doing some kind of testing to see how well this allocation
> policy is working.  We don't want to force all allocations contiguously at
> the start of the filesystem, or we end up with FAT...

I've done several IO patterns with multiple threads and all of my test
are either same or faster performance than with the regular allocator.
Im hopping that moving this to the patch queue will expose the
allocator to more tests.
 
> > +static int ext4_fill_flex_info(struct super_block *sb)
> > +{
> > +   sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
> 
> Hmm, I guess no le*_to_cpu() because this is 8 bits?

Correct.

> > +
> > +   flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
> > +   groups_per_flex;
> > +   sbi->s_flex_groups = kmalloc(flex_group_count *
> > +sizeof(struct flex_groups), GFP_KERNEL);
> > +   if (sbi->s_flex_groups == NULL) {
> > +   printk(KERN_ERR "EXT4-fs: not enough memory\n");
> 
> This should report "not enough memory for N flex groups" or something.

OK.
 
> > @@ -2105,6 +2154,13 @@ static int ext4_fill_super (struct super_block *sb,
> > +   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
> > +   if (!ext4_fill_flex_info(sb)) {
> > +   printk(KERN_ERR
> > +  "EXT4-fs: unable to initialize flex_bg meta 
> > info!\n");
> > +   goto failed_mount2;
> 
> Should this be considered a fatal error, or could sbi->s_log_groups_per_flex
> just be set to 0 and the filesystem be used as-is (maybe with sub-optimal
> allocations or something)?  Otherwise this renders the filesystem unusable.

I thought about doing that but using a sub-optimal allocator would
permanently screw up the ondisk data locality.  Maybe mounting the
filesystem read-only would be more appropriate.

> Cheers, Andreas
> --
> Andreas Dilger
> Sr. Staff Engineer, Lustre Group
> Sun Microsystems of Canada, Inc.
> 



-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] e2fsprogs: New bitmap and inode table allocation for FLEX_BG

2008-01-11 Thread Jose R. Santos
On Fri, 11 Jan 2008 14:01:04 -0700
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Jan 11, 2008  11:28 -0600, Jose R. Santos wrote:
> > +blk_t ext2fs_flexbg_offset(ext2_filsys fs, dgrp_t group, int flexbg_size, 
> > +  ext2fs_block_bitmap bmap, int offset, int size)

OK.
 
> Could you please add some comments for what this function is trying to do?
> 
> > +   last_grp = (group + (flexbg_size - (group % flexbg_size) - 1));
> 
> Is this the same as:
> 
>   last_grp = group + (flexbg_size - 1) / flexbg_size * flexbg_size
>   
> (i.e. trying to round up to the next even multiple of flexbg_size)?
> 
> Didn't we decide to have flexbg_size be a power-of-two value, so we could
> use shift and mask instead of divide and mod?  It's less of an issue because
> group is only a 32-bit value, I guess.

Yes, I fixes this in the kernel code but neglected to fix it on the here.
 
> > +   if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap, 
> > +  &first_free))
> > +   return first_free;
> > +   
> > +   if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size, 
> > +  bmap, &first_free))
> > +   return first_free;
> > +
> > +   return first_free;
> > +}
> > +
> >  errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
> >   ext2fs_block_bitmap bmap)
> >  {
> > errcode_t   retval;
> > blk_t   group_blk, start_blk, last_blk, new_blk, blk;
> > -   int j;
> > +   dgrp_t  last_grp;
> > +   int j, rem_grps, flexbg_size = 0;
> >  
> > group_blk = ext2fs_group_first_block(fs, group);
> > last_blk = ext2fs_group_last_block(fs, group);
> >  
> > if (!bmap)
> > bmap = fs->block_map;
> > +
> > +   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
> > +  EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
> > +   flexbg_size = 1 << fs->super->s_log_groups_per_flex;
> > +   rem_grps = flexbg_size - (group % flexbg_size);
> 
> Hmm, no point in doing "groups % flexbg_size" if we have
> s_log_groups_per_flex.  Could do "groups & (flexbg_size - 1)" instead.
> 
> > @@ -101,7 +102,11 @@ int ext2fs_super_and_bgd_loc(ext2_filsys fs,
> > +   if (flex_bg_size) {
> > +   if ((group % flex_bg_size) == 0)
> > +   numblocks -= 2 + fs->inode_blocks_per_group;
> 
> Ditto.
> 
> > @@ -1045,6 +1046,20 @@ static void PRS(int argc, char *argv[])
> > exit(1);
> > }
> > break;
> > +   case 'G':
> > +   flex_bg_size = strtoul(optarg, &tmp, 0);
> > +   if (*tmp) {
> > +   com_err(program_name, 0,
> > +   _("Illegal number for Flex_BG size"));
> > +   exit(1);
> > +   }
> > +   if (flex_bg_size < 2 || 
> > +   (flex_bg_size & (flex_bg_size-1)) != 0) {
> > +   com_err(program_name, 0,
> > +   _("Flex_BG size must be a power of 2"));
> > +   exit(1);
> > +   }
> > +   break;
> 
> We've been putting new options under "-E var=value"...  I don't know what
> Ted's thoughs are on using new option letters, though this one might qualify.

I thought this would qualify as a new option letter.  Waiting on input
from Ted.

> > @@ -1444,6 +1459,16 @@ static void PRS(int argc, char *argv[])
> > }
> > }
> >  
> > +   if(flex_bg_size) {
> 
> Space after "if ".

Will fix.
 
> > +   shift = 0;
> > +   tmp = flex_bg_size;
> > +   while ((tmp >>= 1UL) != 0UL)
> > +   shift++;
> 
> There isn't a "log2" function?

Couldn't find anything in lib or include.
 
> Cheers, Andreas
> --
> Andreas Dilger
> Sr. Staff Engineer, Lustre Group
> Sun Microsystems of Canada, Inc.
> 



-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] New inode allocation for FLEX_BG meta-data groups.

2008-01-11 Thread Jose R. Santos
commit 8eef19455beb97319a78511b35b1da42a1d48eb2
Author: Jose R. Santos <[EMAIL PROTECTED]>
Date:   Fri Jan 11 11:04:25 2008 -0600

New inode allocation for FLEX_BG meta-data groups.

This patch mostly controls the way inode are allocated in order to
make ialloc aware of flex_bg block group grouping.  It achieves this
by bypassing the Orlov allocator when block group meta-data are packed
toghether through mke2fs.  Since the impact on the block allocator is
minimal, this patch should have little or no effect on other block
allocation algorithms. By controlling the inode allocation, it can
basically control where the initial search for new block begins and
thus indirectly manipulate the block allocator.

This allocator favors data and meta-data locality so the disk will
gradually be filled from block group zero upward.  This helps improve
performance by reducing seek time.  Since the group of inode tables
within one flex_bg are treated as one giant inode table, uninitialized
block groups would not need to partially initialize as many inode
table as with Orlov which would help fsck time as the filesystem usage
goes up.

Singed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Singed-off-by: Valerie Clement <[EMAIL PROTECTED]>

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 643046b..aed3456 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -127,6 +127,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, 
struct buffer_head *bh,
mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
}
 
+   if (sbi->s_log_groups_per_flex)
+   return free_blocks;
return free_blocks - sbi->s_itb_per_group - 2;
 }
 
@@ -759,6 +761,13 @@ do_more:
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
+   if (sbi->s_log_groups_per_flex) {
+   ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
+   spin_lock(sb_bgl_lock(sbi, flex_group));
+   sbi->s_flex_groups[flex_group].free_blocks += count;
+   spin_unlock(sb_bgl_lock(sbi, flex_group));
+   }
+
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
err = ext4_journal_dirty_metadata(handle, bitmap_bh);
@@ -1829,6 +1838,13 @@ allocated:
spin_unlock(sb_bgl_lock(sbi, group_no));
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
 
+   if (sbi->s_log_groups_per_flex) {
+   ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
+   spin_lock(sb_bgl_lock(sbi, flex_group));
+   sbi->s_flex_groups[flex_group].free_blocks -= num;
+   spin_unlock(sb_bgl_lock(sbi, flex_group));
+   }
+
BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
err = ext4_journal_dirty_metadata(handle, gdp_bh);
if (!fatal)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 575b521..d4e8dea 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -158,6 +158,7 @@ void ext4_free_inode (handle_t *handle, struct inode * 
inode)
struct ext4_super_block * es;
struct ext4_sb_info *sbi;
int fatal = 0, err;
+   ext4_group_t flex_group;
 
if (atomic_read(&inode->i_count) > 1) {
printk ("ext4_free_inode: inode has count=%d\n",
@@ -235,6 +236,12 @@ void ext4_free_inode (handle_t *handle, struct inode * 
inode)
if (is_directory)
percpu_counter_dec(&sbi->s_dirs_counter);
 
+   if (sbi->s_log_groups_per_flex) {
+   flex_group = ext4_flex_group(sbi, block_group);
+   spin_lock(sb_bgl_lock(sbi, flex_group));
+   sbi->s_flex_groups[flex_group].free_inodes++;
+   spin_unlock(sb_bgl_lock(sbi, flex_group));
+   }
}
BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
err = ext4_journal_dirty_metadata(handle, bh2);
@@ -289,6 +296,75 @@ static int find_group_dir(struct super_block *sb, struct 
inode *parent,
return ret;
 }
 
+#define free_block_ratio 10
+
+static int find_group_flex(struct super_block *sb, struct inode *parent, 
ext4_group_t *best_group)
+{
+   struct ext4_sb_info *sbi = EXT4_SB(sb);
+   struct ext4_group_desc *desc;
+   struct buffer_head *bh;
+   struct flex_groups *flex_group = sbi->s_flex_groups;
+   ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
+   ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
+   ext4_group_t ngroups = sbi->s_groups_count;
+   int flex_size = 

[PATCH] e2fsprogs: New bitmap and inode table allocation for FLEX_BG

2008-01-11 Thread Jose R. Santos
commit 38a4134f29b06229843bfe838c23e28f8d323b86
Author: Jose R. Santos <[EMAIL PROTECTED]>
Date:   Fri Jan 11 11:03:03 2008 -0600

New bitmap and inode table allocation for FLEX_BG

Change the way we allocate bitmaps and inode tables if the FLEX_BG
feature is used at mke2fs time.  It places calculates a new offset for
bitmaps and inode table base on the number of groups that the user
wishes to pack together using the new "-G" option.  Creating a
filesystem with 64 block groups in a flex group can be done by:

mke2fs -j -I 256 -O flex_bg -G 32 /dev/sdX

    Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Singed-off-by: Valerie Clement <[EMAIL PROTECTED]>

diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..f85ef97 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -27,18 +27,55 @@
 #include "ext2_fs.h"
 #include "ext2fs.h"
 
+blk_t ext2fs_flexbg_offset(ext2_filsys fs, dgrp_t group, int flexbg_size, 
+  ext2fs_block_bitmap bmap, int offset, int size)
+{
+   int flexbg;
+   errcode_t   retval;
+   blk_t   start_blk, last_blk, first_free = 0;
+   dgrp_t  last_grp;
+   
+   flexbg = group / flexbg_size;
+
+   last_grp = (group + (flexbg_size - (group % flexbg_size) - 1));
+   start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg);
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   if (last_grp > fs->group_desc_count)
+   last_grp = fs->group_desc_count;
+
+   if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap, 
+  &first_free))
+   return first_free;
+   
+   if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size, 
+  bmap, &first_free))
+   return first_free;
+
+   return first_free;
+}
+
 errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
  ext2fs_block_bitmap bmap)
 {
errcode_t   retval;
blk_t   group_blk, start_blk, last_blk, new_blk, blk;
-   int j;
+   dgrp_t  last_grp;
+   int j, rem_grps, flexbg_size = 0;
 
group_blk = ext2fs_group_first_block(fs, group);
last_blk = ext2fs_group_last_block(fs, group);
 
if (!bmap)
bmap = fs->block_map;
+
+   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
+  EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+   flexbg_size = 1 << fs->super->s_log_groups_per_flex;
+   rem_grps = flexbg_size - (group % flexbg_size);
+   last_grp = group + rem_grps - 1;
+   if (last_grp > fs->group_desc_count)
+   last_grp = fs->group_desc_count;
+   }

/*
 * Allocate the block and inode bitmaps, if necessary
@@ -56,6 +93,12 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t 
group,
} else
start_blk = group_blk;
 
+   if (flexbg_size) {
+   start_blk = ext2fs_flexbg_offset (fs, group, flexbg_size, bmap,
+ 0, rem_grps);
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   }
+
if (!fs->group_desc[group].bg_block_bitmap) {
retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
1, bmap, &new_blk);
@@ -68,6 +111,12 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
fs->group_desc[group].bg_block_bitmap = new_blk;
}
 
+   if (flexbg_size) {
+   start_blk = ext2fs_flexbg_offset (fs, group, flexbg_size, bmap,
+ flexbg_size, rem_grps);
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   }
+
if (!fs->group_desc[group].bg_inode_bitmap) {
retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
1, bmap, &new_blk);
@@ -83,6 +132,13 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
/*
 * Allocate the inode table
 */
+   if (flexbg_size) {
+   group_blk = ext2fs_flexbg_offset (fs, group, flexbg_size, bmap,
+ flexbg_size * 2,
+ fs->inode_blocks_per_group * 
rem_grps);
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   }
+
if (!fs->group_desc[group].bg_inode_table) {
retval = ext2fs_get_free_blocks(fs, group_blk, last_blk,
fs->i

Re: What's cooking in e2fsprogs.git (topics) - [RFC] FLEX_BG bmap and itable allocation patch.

2007-12-18 Thread Jose R. Santos
On Mon, 17 Dec 2007 12:11:00 -0500
Theodore Tso <[EMAIL PROTECTED]> wrote:

> Here are the topics that have been cooking.  Commits prefixed
> with '-' are only in 'pu' while commits prefixed with '+' are
> in 'next'.  The topics list the commits in reverse chronological
> order.
...

> * js/flex-bg (Mon Aug 13 23:33:14 2007 -0500) 1 commit
>  - New bitmap and inode table allocation for FLEX_BG

I've started fixing this patch in order to address resize2fs problems
with the previous patch.  I've got a patch that seems to do the right
thing now.  Let me know if you agree with the general approach of the
patch and I'll fix it to put it in shape to get it added to the next
branch.  Still incomplete, but there is enough for you to let me know
if you like this approach better than the previous patch.

-JRS


commit 37570ae6196045ce02a25f6c95fbdd103633bfb5
Author: Jose R. Santos <[EMAIL PROTECTED]>
Date:   Sat Dec 15 08:09:35 2007 -0600

    New bitmap and inode table allocation for FLEX_BG

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>

diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..598a360 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -27,18 +27,55 @@
 #include "ext2_fs.h"
 #include "ext2fs.h"
 
+blk_t ext2fs_flexbg_offset(ext2_filsys fs, dgrp_t group, int flexbg_size, 
+  ext2fs_block_bitmap bmap, int offset, int size)
+{
+   int flexbg;
+   errcode_t   retval;
+   blk_t   start_blk, last_blk, first_free = 0;
+   dgrp_t  last_grp;
+   
+   flexbg = group/flexbg_size;
+
+   last_grp = (group + (flexbg_size - (group % flexbg_size) - 1));
+   start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg);
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   if (last_grp > fs->group_desc_count)
+   last_grp = fs->group_desc_count;
+
+   if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap, 
+  &first_free))
+   return first_free;
+   
+   if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size, 
+  bmap, &first_free))
+   return first_free;
+
+   return first_free;
+}
+
 errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
  ext2fs_block_bitmap bmap)
 {
errcode_t   retval;
blk_t   group_blk, start_blk, last_blk, new_blk, blk;
-   int j;
+   dgrp_t  last_grp;
+   int j, rem_grps, flexbg_size = 0;
 
group_blk = ext2fs_group_first_block(fs, group);
last_blk = ext2fs_group_last_block(fs, group);
 
if (!bmap)
bmap = fs->block_map;
+
+   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
+  EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+   flexbg_size = ext2fs_swab16(fs->super->s_flex_bg_size);
+   rem_grps = flexbg_size - (group % flexbg_size);
+   last_grp = group + rem_grps - 1;
+   if (last_grp > fs->group_desc_count)
+   last_grp = fs->group_desc_count;
+   }

/*
 * Allocate the block and inode bitmaps, if necessary
@@ -56,6 +93,12 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t 
group,
} else
start_blk = group_blk;
 
+   if (flexbg_size) {
+   start_blk = ext2fs_flexbg_offset (fs, group, flexbg_size, bmap,
+ 0, rem_grps);
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   }
+
if (!fs->group_desc[group].bg_block_bitmap) {
retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
1, bmap, &new_blk);
@@ -68,6 +111,12 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
fs->group_desc[group].bg_block_bitmap = new_blk;
}
 
+   if (flexbg_size) {
+   start_blk = ext2fs_flexbg_offset (fs, group, flexbg_size, bmap,
+ flexbg_size, rem_grps);
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   }
+
if (!fs->group_desc[group].bg_inode_bitmap) {
retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
1, bmap, &new_blk);
@@ -83,6 +132,13 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
/*
 * Allocate the inode table
 */
+   if (flexbg_size) {
+   group_blk = ext2fs_flexbg_offset (fs, group, flexbg_size, bmap,
+   

Re: [RFC] [PATCH] Flex_BG ialloc awareness V2.

2007-12-14 Thread Jose R. Santos
On Fri, 14 Dec 2007 10:01:06 -0700
Andreas Dilger <[EMAIL PROTECTED]> wrote:
> Well, I can imagine in some cases that the flexbg will not be completely
> contiguous on disk (e.g. after a filesystem resize, if there are bad
> blocks, etc).  As long as the group descriptors themselves are correct
> (i.e. referencing valid bitmaps/itable) then it shouldn't cause a mount
> failure if the per-group data isn't strictly aligned according to the
> superblock flexbg count.

Yes, the meta-data may not be completely contiguous on the disk as per
the definition of flexbg.  What I was planing on doing was to check the
first, second and last-1 flexbg to see if how the meta-data is
arranged.  If none of those flexbg matches the size of the flexbg size
in the super block the we can set sbi->s_groups_per_flex_shift to zero
which would make the fs fallback to Orlov.
 
> We would need to validate the group descriptor separately though (e.g.
> group checksums).

Agree
 
> Cheers, Andreas
> --
> Andreas Dilger
> Sr. Staff Engineer, Lustre Group
> Sun Microsystems of Canada, Inc.
> 

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] [PATCH] Flex_BG ialloc awareness V2.

2007-12-13 Thread Jose R. Santos
On Thu, 13 Dec 2007 15:58:57 -0700
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Dec 13, 2007  09:51 -0600, Jose R. Santos wrote:
> > Now, storing the bits only guaranties that the flexbg size is always a
> > power-of-two and does not guarantee that the super block flexbg size
> > represents the actual meta-data grouping on disk.  For this we need to
> > verify that the bitmap offsets match what the super block reports.  It
> > may be an unlikely scenario, but it may be worth it to check this as
> > well at mount time.
> 
> I'm not sure what you mean...  Isn't the flexbg size just a count of
> the number of block groups?  If it is always a power of two, and the
> groups per metabg is always a power of two (it is) then they will
> always be even multiples.

What I mean is that if the value in the super block is corrupted and
does not represent the actual flexbg size, the inode allocation will
behave in weird unexpected ways.  Just as we check that the bitmaps are
within the block group range (when not using flexbg), we should
probably sanity check the size of the flexbg as reported in the super
block.

Or do you believe the check is unnecessary?

> Cheers, Andreas
> --
> Andreas Dilger
> Sr. Staff Engineer, Lustre Group
> Sun Microsystems of Canada, Inc.
> 



-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] [PATCH] Flex_BG ialloc awareness V2.

2007-12-13 Thread Jose R. Santos
On Tue, 11 Dec 2007 16:15:28 -0700
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Dec 11, 2007  10:08 -0600, Jose R. Santos wrote:
> > > I'd think being able to avoid the divide for every inode allocation is 
> > > more
> > > important than 8 bits in the superblock.
> > 
> > We already avoid the divide since what we store in the sbi IS the bits
> > which are calculated at mount time for each fs.  Base on the other
> > fields in the super block struct, I decided to put explicit size of the
> > flexbg in the super block.  The kernel code can decide how best to use
> > that number which in this case its used to calculate the number of bits
> > in order to avoid doing divides.
> > 
> > So this is really a styling issue in how to record data in the super
> > block.  The only technical issue with this is whether it's important to
> > save those extra 8 bits in the super block struct.
> 
> Well, if it is stored in the superblock as a non-power-of-two value, then
> there always exists the possibility that it is set incorrectly (maybe by
> a version of mke2fs that doesn't verify this) and the code will not do the
> right thing.  Storing it in bits (as is done with e.g. s_log_block_size and
> s_log_frag_size) ensures there is no possibility of a value that isn't a
> power-of-two.

While I don't necessary buy the mke2fs example (the only patch that
set this already checks for power-of-two), you are right about the
possibility of being set incorrectly.  I will change it to store the
bits in the next release which I'll do after I fix the resize2fs issues
since this will require changes to the e2fsprogs as well.

Now, storing the bits only guaranties that the flexbg size is always a
power-of-two and does not guarantee that the super block flexbg size
represents the actual meta-data grouping on disk.  For this we need to
verify that the bitmap offsets match what the super block reports.  It
may be an unlikely scenario, but it may be worth it to check this as
well at mount time.

> Cheers, Andreas
> --
> Andreas Dilger
> Sr. Staff Engineer, Lustre Group
> Sun Microsystems of Canada, Inc.
> 


-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] [PATCH] Flex_BG ialloc awareness V2.

2007-12-11 Thread Jose R. Santos
On Tue, 11 Dec 2007 04:00:33 -0700
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Dec 07, 2007  09:52 -0600, Jose R. Santos wrote:
> > Andreas Dilger <[EMAIL PROTECTED]> wrote:
> > > There is no particular reason that this ratio needs to be "*100", it could
> > > just as easily be a fraction of 256 and make the multiply into a shift.
> > > The free_block_ratio would be 26 in that case.
> > 
> > The idea here is to reserve 10% (free_block_ratio) of free blocks in a
> > flexbg for allocation of new files and expansion of existing one.  The
> > "*100" make the math here easy but this still something that need to be
> > tune further.  I'm sure we can do this in a series of shifts, just
> > haven't spent the time thinking of a clever way to do this.
> 
> This is a common misconception for code to have 10% mean 10 / 100.  It
> is just as good to have 26/256

I understand that part, but my point is that changing the multiply to
256 doesn't do anything to eliminate the divide by blocks_per_flex.
Give that is more common to have an arch with no divide instruction
than one with no multiply, it seems more important to take care of the
divide by blocks_per_flex rather than the multiply by 100.

We could store the blocks_per_flex_bits in the sbi to do this but the
last flexbg is not guarantied to be the same size as the other flexbg
so it needs to be treated differently.

Hum...  Now that I think of it, the last flexbg is not treated
differently on the current patch either.  Looks like I found a bug. :)

> > > > @@ -622,7 +631,9 @@ struct ext4_super_block {
> > > > __le16  s_mmp_interval; /* # seconds to wait in MMP 
> > > > checking */
> > > > __le64  s_mmp_block;/* Block for multi-mount 
> > > > protection */
> > > > __le32  s_raid_stripe_width;/* blocks on all data disks 
> > > > (N*stride)*/
> > > > -   __u32   s_reserved[163];/* Padding to the end of the 
> > > > block */
> > > > +   __le16  s_flex_bg_size; /* FLEX_BG group size */
> > > 
> > > Shouldn't this be "s_flex_bg_bits"?
> > 
> > I debated whether to store this as the s_flex_bg_size and calculate the
> > bits during the filesystem mount time or just stored the bit in the
> > super block to begging with.  The reason I stored the size is that it
> > seemed more in line with the other fields in the super block.  I don't
> > mind either way since this is more of a style issue, although saving an
> > extra 8bits in the super block may be good enough reason to change it. 
> 
> I'd think being able to avoid the divide for every inode allocation is more
> important than 8 bits in the superblock.

We already avoid the divide since what we store in the sbi IS the bits
which are calculated at mount time for each fs.  Base on the other
fields in the super block struct, I decided to put explicit size of the
flexbg in the super block.  The kernel code can decide how best to use
that number which in this case its used to calculate the number of bits
in order to avoid doing divides.

So this is really a styling issue in how to record data in the super
block.  The only technical issue with this is whether it's important to
save those extra 8 bits in the super block struct.
 
> > > My preference would be to have "if (EXT2_HAS_INCOMPAT...) { ... } else {"
> > > (i.e. add { } for the first part) since there are { } on the second part,
> > > and it is just easier to read.
> > 
> > Mine too, but checkpatch complained about this. :)
> 
> Time to fix checkpatch it would seem.
> 
> Cheers, Andreas
> --
> Andreas Dilger
> Sr. Staff Engineer, Lustre Group
> Sun Microsystems of Canada, Inc.
> 



-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC] [PATCH] Flex_BG ialloc awareness V2.

2007-12-07 Thread Jose R. Santos
On Fri, 7 Dec 2007 03:14:28 -0700
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Dec 06, 2007  16:10 -0600, Jose R. Santos wrote:
> > @@ -600,6 +600,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct 
> > super_block *sb,
> > struct ext4_sb_info *sbi;
> > int err = 0, ret;
> > ext4_grpblk_t group_freed;
> > +   ext4_group_t flex_group;
> >  
> > *pdquot_freed_blocks = 0;
> > sbi = EXT4_SB(sb);
> > @@ -745,6 +746,14 @@ do_more:
> > spin_unlock(sb_bgl_lock(sbi, block_group));
> > percpu_counter_add(&sbi->s_freeblocks_counter, count);
> >  
> > +   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
> > +   sbi->s_groups_per_flex_shift) {
> > +   flex_group = ext4_flex_group(sbi, block_group);
> > +   spin_lock(sb_bgl_lock(sbi, flex_group));
> > +   sbi->s_flex_groups[flex_group].free_blocks += count;
> > +   spin_unlock(sb_bgl_lock(sbi, flex_group));
> > +   }
> 
> In general, I prefer to keep variables in as local a scope as possible.
> In this case, flex_group could be declared inside the "if (EXT4_HAS_INCOMPAT"
> check.

Ok.

> > +#define free_block_ratio 10
> > +
> > +int find_group_flex(struct super_block *sb, struct inode *parent)
> > +{
> > +   n_fbg_groups = (sbi->s_groups_count + flex_size - 1) / flex_size;
> > +   best_flex = parent_fbg_group;
> > +
> > +find_close_to_parent:
> > +   flex_freeb_ratio = 
> > flex_group[best_flex].free_blocks*100/blocks_per_flex;
> 
> There is no particular reason that this ratio needs to be "*100", it could
> just as easily be a fraction of 256 and make the multiply into a shift.
> The free_block_ratio would be 26 in that case.

The idea here is to reserve 10% (free_block_ratio) of free blocks in a
flexbg for allocation of new files and expansion of existing one.  The
"*100" make the math here easy but this still something that need to be
tune further.  I'm sure we can do this in a series of shifts, just
haven't spent the time thinking of a clever way to do this.

Although, given all the multiplies, divides, endian changes that occur
while using Orlov, I'm not so concern about this right now.

> > +   for (i = 0; i < n_fbg_groups; i++) {
> > +   if (i == parent_fbg_group || i == parent_fbg_group - 1)
> > +   continue;
> 
> It seems this scans flex groups the way we used to scan groups?

No.  It does something slightly different, the scan does not start from
the parent group forward.  This help compress data as much as possible
in the disk and helps avoid large seeks.  Reclaiming as much used
groups as possible will also help uninitialized block groups by avoiding
using groups when there is perfectly good unused space at the beginning
of the disk.  Currently the search starts at the first flexbg but for
very large filesystems, this should be tune to start at a location
closer to the parents flex group.  This is another area where the patch
needs more tuning, but I was hopping people would give this patch a
try to see what deficiencies they find before going into lengthy disk
testing/tuning cycle.

> > +found_flexbg:
> > +   for (i = best_flex * flex_size; i < ngroups &&
> > +i < (best_flex + 1) * flex_size; i++) {
> 
> And now that we've found a suitable flex group, we need to find which
> block group therein has some free inodes...

Yes, but we treat all inode tables in a flex group as one giant table
to improve locality and reduce initialization of inode tables to
improve fsck time.

> 
> > +static int ext4_fill_flex_info(struct super_block *sb)
> > +{
> 
> It still seems desirable to have a single per-group array instead of

?
 
> > @@ -622,7 +631,9 @@ struct ext4_super_block {
> > __le16  s_mmp_interval; /* # seconds to wait in MMP checking */
> > __le64  s_mmp_block;/* Block for multi-mount protection */
> > __le32  s_raid_stripe_width;/* blocks on all data disks (N*stride)*/
> > -   __u32   s_reserved[163];/* Padding to the end of the block */
> > +   __le16  s_flex_bg_size; /* FLEX_BG group size */
> 
> Shouldn't this be "s_flex_bg_bits"?

I debated whether to store this as the s_flex_bg_size and calculate the
bits during the filesystem mount time or just stored the bit in the
super block to begging with.  The reason I stored the size is that it
seemed more in line with the other fields in the super block.  I don't
mind either way since this is more of a style issue, although saving an
extra 8bits in the 

[RFC] [PATCH] Flex_BG ialloc awareness V2.

2007-12-06 Thread Jose R. Santos
Hi folks,

New version of the Flex_BG ialloc allocation patch.

Changes from the last version:

- Size of the FLEX_BG in now written to the super block at mke2fs time
  instead of calculating at mount time (testing patch for e2fsprog's
  next branch attached).

- Rename a lots of the confusing "meta" terms as suggested by Andreas.

- Use the Orlov if the FLEX_BG size is 0.

- Use shift instead of divide in ext4_meta_group() as suggested by
  Andreas.

- Use sb_bgl_lock() instead of one spinlock per FLEX_BG as suggested by
  Andreas.  (Needs more perf testing)

- Remove some dead/prototype code.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b102b0e..7ef9787 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -600,6 +600,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct 
super_block *sb,
struct ext4_sb_info *sbi;
int err = 0, ret;
ext4_grpblk_t group_freed;
+   ext4_group_t flex_group;
 
*pdquot_freed_blocks = 0;
sbi = EXT4_SB(sb);
@@ -745,6 +746,14 @@ do_more:
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
+   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+   sbi->s_groups_per_flex_shift) {
+   flex_group = ext4_flex_group(sbi, block_group);
+   spin_lock(sb_bgl_lock(sbi, flex_group));
+   sbi->s_flex_groups[flex_group].free_blocks += count;
+   spin_unlock(sb_bgl_lock(sbi, flex_group));
+   }
+
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
err = ext4_journal_dirty_metadata(handle, bitmap_bh);
@@ -1610,6 +1619,7 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct 
inode *inode,
unsigned short windowsz = 0;
ext4_group_t ngroups;
unsigned long num = *count;
+   ext4_group_t flex_group;
 
*errp = -ENOSPC;
sb = inode->i_sb;
@@ -1815,6 +1825,14 @@ allocated:
spin_unlock(sb_bgl_lock(sbi, group_no));
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
 
+   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+   sbi->s_groups_per_flex_shift) {
+   flex_group = ext4_flex_group(sbi, group_no);
+   spin_lock(sb_bgl_lock(sbi, flex_group));
+   sbi->s_flex_groups[flex_group].free_blocks -= num;
+   spin_unlock(sb_bgl_lock(sbi, flex_group));
+   }
+
BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
err = ext4_journal_dirty_metadata(handle, gdp_bh);
if (!fatal)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 17b5df1..35ab8ff 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -158,6 +158,7 @@ void ext4_free_inode (handle_t *handle, struct inode * 
inode)
struct ext4_super_block * es;
struct ext4_sb_info *sbi;
int fatal = 0, err;
+   ext4_group_t flex_group;
 
if (atomic_read(&inode->i_count) > 1) {
printk ("ext4_free_inode: inode has count=%d\n",
@@ -235,6 +236,13 @@ void ext4_free_inode (handle_t *handle, struct inode * 
inode)
if (is_directory)
percpu_counter_dec(&sbi->s_dirs_counter);
 
+   if (EXT4_HAS_INCOMPAT_FEATURE(sb, 
EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+   sbi->s_groups_per_flex_shift) {
+   flex_group = ext4_flex_group(sbi, block_group);
+   spin_lock(sb_bgl_lock(sbi, flex_group));
+   sbi->s_flex_groups[flex_group].free_inodes++;
+   spin_unlock(sb_bgl_lock(sbi, flex_group));
+   }
}
BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
err = ext4_journal_dirty_metadata(handle, bh2);
@@ -289,6 +297,71 @@ static int find_group_dir(struct super_block *sb, struct 
inode *parent,
return ret;
 }
 
+#define free_block_ratio 10
+
+int find_group_flex(struct super_block *sb, struct inode *parent)
+{
+   struct ext4_sb_info *sbi = EXT4_SB(sb);
+   struct ext4_group_desc *desc;
+   struct buffer_head *bh;
+   struct flex_groups *flex_group = sbi->s_flex_groups;
+   ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
+   ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
+   ext4_group_t ngroups = sbi->s_groups_count;
+   int flex_size = ext4_flex_bg_size(sbi);
+   ext4_group_t best_flex = -1;
+   ext4_group_t best_group = -1;
+   int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
+   int flex_freeb_ratio;
+   ext4_group_t n

Re: [RFC] Flex_BG ialloc awareness.

2007-12-04 Thread Jose R. Santos
On Mon, 3 Dec 2007 13:42:47 -0700
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Dec 03, 2007  13:05 -0600, Jose R. Santos wrote:
> > @@ -600,6 +600,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct 
> > super_block *sb,
> > ext4_grpblk_t group_freed;
> > +   ext4_group_t meta_group;
> 
> Please do not call these meta_groups.  This already means something very
> specific (i.e. desc_per_block groups) and using it for FLEX_BG is confusing.
> One possibly desirable relation is if the FLEX_BG count is some integer or
> power-of-two multiple of the metabg count.  That would allow the FLEX_BG
> code to share the same in-memory group struct as the mballoc code and save
> on some memory overhead.

Yes, need to clean the naming on some of these.  I also need to look
into the mballoc code to see if there is anything I can reuse.

> > +   meta_group = ext4_meta_group(sbi, block_group);
> > +   
> > spin_lock(&sbi->s_meta_groups[meta_group].meta_group_lock);
> > +   sbi->s_meta_groups[meta_group].free_inodes++;
> > +   if (is_directory)
> > +   sbi->s_meta_groups[meta_group].num_dirs--;
> > +   
> > spin_unlock(&sbi->s_meta_groups[meta_group].meta_group_lock);
> 
> This can be as many as hundreds or thousands of spin locks.  Why not use
> the same hashed locking code as the group descriptors themselves?
> 
>   spin_lock(sb_bgl_lock(sbi, meta_group));
>   spin_unlock(sb_bgl_lock(sbi, meta_group));
> 
> This scales with the number of CPUs and chance of contention is very low.

Excellent.  I was thinking that one spinlock per flex_bg was overkill
as well but I did not know the existence of blockgroup_lock.h.

> > +int find_group_meta(struct super_block *sb, struct inode *parent)
> > +{
> > +   ext4_group_t parent_mgroup = parent_group / sbi->s_groups_per_meta;
> 
> This could use ext4_meta_group(sbi, parent_group)?

Yes, thanks for catching.
 
> > +static inline ext4_group_t ext4_meta_group(struct ext4_sb_info *sbi,
> > +ext4_group_t block_group)
> > +{
> > +   return block_group/sbi->s_groups_per_meta;
> > +}
> 
> It would be preferable to limit s_groups_per_meta to be a power-of-two
> so that this can become a shift instead of a divide.

Seems like I always fall into the same trap.  I'll change this.

> Cheers, Andreas
> --
> Andreas Dilger
> Sr. Staff Engineer, Lustre Group
> Sun Microsystems of Canada, Inc.
> 

Thanks.

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC] Flex_BG ialloc awareness.

2007-12-03 Thread Jose R. Santos
Hi folk,

Im preparing to clean up the FLEX_BG ialloc patch for inclusion into
the patch queue and decided that might as well send the current version
of the patch to the mailing list to get any early feedback from folks.

This patch mostly controls the way inode are allocated in order to make
ialloc aware of flex_bg block group grouping.  It achieves this by
bypassing the Orlov allocator when the use of FLEX_BG is detected.
Since the impact on the block allocator is minimal, this patch should
have little or no effect on other block allocation algorithms. By
controlling the inode allocation, I can basically control where the
initial search for new block begins and thus indirectly manipulate the
block allocator.

This allocator favors data and meta-data locality so the disk will
gradually be filled from begging to end unlike the Orlov algorithm that
places a new inode anywhere on the disk where more free inodes and
blocks are available.  Since the group of inode tables within one
flex_bg are treated as one giant inode table, uninitialized block groups
would not need to partially initialize as many inode table as with Orlov
which would help fsck time as the filesystem usage goes up.

I've done testing on both SCSI and Fiber Channel attached storage
sub-systems and the performance seems to be mostly equal or better than
with the regular meta-data allocation.  Multi-threaded results on Fiber
Channel disk are very impresive though...

FFSB multi-threaded IO workload with no flexbg, flexbg with 64 groups
and flexbg with 128 groups:

ext4-normal-profile-crt-delete-append-read-small-threads_64 read :
89490 ops (5.456950%) write : 130001 ops (7.927243%)
create : 1116427 ops (68.077847%)
append : 217196 ops (13.244248%)
delete : 86813 ops (5.293711%)
5437.50 Transactions per Second
5.5% User   Time
599.9% System Time
605.4% CPU Utilization

ext4-flexbg-64-profile-crt-delete-append-read-small-threads_64
read : 124273 ops (5.462826%)
write : 181257 ops (7.967743%)
create : 1547156 ops (68.010295%)
append : 301872 ops (13.269770%)
delete : 120327 ops (5.289366%)
7559.41 Transactions per Second
7.5% User   Time
281.7% System Time
289.2% CPU Utilization

ext4-flexbg-128-profile-crt-delete-append-read-small-threads_64
read : 115689 ops (5.460529%)
write : 168328 ops (7.945093%)
create : 1441175 ops (68.023558%)
append : 280600 ops (13.244339%)
delete : 112849 ops (5.326481%)
7024.59 Transactions per Second
6.6% User   Time
296.1% System Time
302.7% CPU Utilization

Also tried Compilebench with 300 initial dirs to see if the
improvements seen in FFSB could also be seen in other benchmarks and
the results are encouraging.

Normal:
intial create total runs 50 avg 55.74 MB/s (user 1.87s sys 3.68s)
create total runs 5 avg 60.24 MB/s (user 1.91s sys 3.37s)
patch total runs 4 avg 19.33 MB/s (user 0.77s sys 3.07s)
compile total runs 7 avg 110.79 MB/s (user 0.39s sys 4.12s)
clean total runs 4 avg 633.64 MB/s (user 0.08s sys 0.64s)
read tree total runs 2 avg 11.26 MB/s (user 1.92s sys 2.70s)
read compiled tree total runs 1 avg 30.98 MB/s (user 2.44s sys 4.80s)
delete tree total runs 2 avg 4.05 seconds (user 1.31s sys 2.78s)
no runs for delete compiled tree
stat tree total runs 4 avg 3.78 seconds (user 1.35s sys 0.89s)
stat compiled tree total runs 1 avg 4.15 seconds (user 1.45s sys 1.07s)

FLEX_BG 64 groups:
intial create total runs 50 avg 71.97 MB/s (user 1.89s sys 3.82s)
create total runs 5 avg 57.35 MB/s (user 1.89s sys 4.85s)
patch total runs 4 avg 19.89 MB/s (user 0.76s sys 2.99s)
compile total runs 7 avg 116.79 MB/s (user 0.39s sys 4.17s)
clean total runs 4 avg 736.80 MB/s (user 0.07s sys 0.62s)
read tree total runs 2 avg 15.53 MB/s (user 1.97s sys 2.80s)
read compiled tree total runs 1 avg 31.59 MB/s (user 2.48s sys 5.02s)
delete tree total runs 2 avg 3.15 seconds (user 1.18s sys 2.50s)
no runs for delete compiled tree
stat tree total runs 4 avg 2.43 seconds (user 1.25s sys 0.90s)
stat compiled tree total runs 1 avg 3.13 seconds (user 1.48s sys 1.03s)


There are still a couple of things that need fixing in the patch but I
would to get some opinions as well.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b102b0e..4ece12d 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -600,6 +600,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct 
super_block *sb,
struct ext4_sb_info *sbi;
int err = 0, ret;
ext4_grpblk_t group_freed;
+   ext4_group_t meta_group;
 
*pdquot_freed_blocks = 0;
sbi = EXT4_SB(sb);
@@ -745,6 +746,11 @@ do_more:
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_add(&sbi->s_freeblocks_counter, count);
 
+   meta_group = ext4_meta_group(sbi, block_group);
+   spin_lock(&sbi->s_meta_groups[meta_group].meta_group_lock);
+   sbi->s_meta_groups[meta_group].free_blocks += count;
+   spin_unlock(&sbi->s_meta_groups[meta_group

Re: [PATCH 4/4][e2fsprogs] New bitmap and inode table allocation for FLEX_BG

2007-11-05 Thread Jose R. Santos
On Sat, 3 Nov 2007 20:52:08 -0400
Theodore Tso <[EMAIL PROTECTED]> wrote:

> On Mon, Aug 13, 2007 at 11:33:14PM -0500, Jose R. Santos wrote:
> > +   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super, 
> > +  EXT4_FEATURE_INCOMPAT_FLEX_BG)) 
> > +   ext2fs_allocate_flex_groups(fs);
> > +   
> > +   else {
> > +   for (i = 0; i < fs->group_desc_count; i++) {
> > +   retval = ext2fs_allocate_group_table(fs, i, 
> > fs->block_map);
> > +   if (retval)
> > +   return retval;
> > +   }
> 
> The code to handle flex groups needs to be moved into
> ext2fs_allocate_group_table(), and not put in
> ext2fs_allocate_tables(), since resize2fs calls
> ext2fs_allocate_group_table(), and we want resize2fs to do the right
> thing for filesystems that have both the FLEX_BG and META_BG flags
> set.  Speaking of which, we need to fix the kernel on-line resizing
> code to allocate new blocks for filesystem metadata blocks using the
> new placement algorithm for FLEX_BG && META_BG filesystems.
> 
> I'll fix this up for the 'pu' branch of e2fsprogs, as well as making
> ext2fs_allocate_flex_groups static, but I'm not going to graduate this
> to the 'next' branch just yet, since it needs a bit more testing,
> specifically with resize2fs.

Yes, this code is not ready for 'next' branch.  This was mostly
intended as RFC and testing.  Resizing is not handle at all in this
code and fsck needs more testing.

>   - Ted



-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4][e2fsprogs] Relax group descriptor checking.

2007-11-05 Thread Jose R. Santos
On Sat, 3 Nov 2007 19:36:09 -0400
Theodore Tso <[EMAIL PROTECTED]> wrote:

> On Mon, Aug 13, 2007 at 11:33:03PM -0500, Jose R. Santos wrote:
> > From: Jose R. Santos <[EMAIL PROTECTED]>
> > 
> > Relax group descriptor checking.
> 
> This patch should really be before patch #2 in the series (add the
> ability to handle the new feature before adding the ability to add in
> mke2fs).  (Actually, I would have split up #2 into one patch which
> added the libe2p handling for the feature, then added the change to
> e2fsck, and then added the mke2fs changes, but that's just me
> quibbling.)

Sound reasonable.  I'll take care of patch ordering better next time.

> > @@ -578,8 +580,16 @@ void check_super_block(e2fsck_t ctx)
> > for (i = 0, gd=fs->group_desc; i < fs->group_desc_count; i++, gd++) {
> > pctx.group = i;
> >  
> > -   first_block = ext2fs_group_first_block(fs, i);
> > -   last_block = ext2fs_group_last_block(fs, i);
> > +   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
> > +  EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
> > +   meta_bg_size = (fs->blocksize / sizeof (struct 
> > ext2_group_desc));
> > +   start_group = (i / meta_bg_size) * meta_bg_size;
> > +   first_block = ext2fs_group_first_block(fs, start_group);
> > +   last_block = ext2fs_group_first_block(fs, start_group + 
> > meta_bg_size);
> 
> This patch requires that the metadata be in located in the metablock
> group descriptor, instead of anywhere in the filesystem, which is what
> we ultimately ended up checking into the kernel.  Being more flexible
> is good (even if that's not the layout we use by default).  I'll fix
> this up using git rebase --interactive and republish the patch in the
> next branch.

Yes, the patch is slightly off and I did send an email pointing to
exactly this, but you had already committed the patch by the time the
email was sent.  This patch was submitted to the mailing list before
the final kernel changes made it into the queue.

>   - Ted

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 13/13][e2fsprogs] Add m_uninit test case.

2007-11-01 Thread Jose R. Santos
On Thu, 1 Nov 2007 06:03:00 -0500
"Jose R. Santos" <[EMAIL PROTECTED]> wrote:
> It seems like a problem on your end since I got the patches from the
> mailing list after sending them.  Here are the patches from the
> archives.
> 
> [PATCH 01/13] http://lists.openwall.net/linux-ext4/2007/10/11/20
> [PATCH 11/13] http://lists.openwall.net/linux-ext4/2007/10/11/29
> [PATCH 12/13] http://lists.openwall.net/linux-ext4/2007/10/11/33

Ops...   There is an slightly updated version of the patch series that
has a couple of fixes.  It also has one additional patch.

http://www.mail-archive.com/linux-ext4@vger.kernel.org/msg03803.html

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 13/13][e2fsprogs] Add m_uninit test case.

2007-11-01 Thread Jose R. Santos
On Thu, 01 Nov 2007 17:58:42 +0800
Coly Li <[EMAIL PROTECTED]> wrote:

> Coly Li wrote:
> > I plan to do some work based on uninit group patch. When I try to patch the 
> > patch into kernel, I
>   
>  sorry
> it's typo, I mean into e2fsprogs...
> > find the sequence number of patch is not continuous. Here is what I find 
> > for e2fsprogs patch,
> > 
> > [PATCH 00/13]
> > [PATCH 02/13]
> > [PATCH 03/13]
> > [PATCH 04/13]
> > [PATCH 05/13]
> > [PATCH 06/13]
> > [PATCH 07/13]
> > [PATCH 08/13]
> > [PATCH 09/13]
> > [PATCH 10/13]
> > [PATCH 13/13]
> > 
> > I can not find 01, 11 and 12. Do I miss them, or where can I find these 3 
> > patch ?
> > 
> > Thanks.

It seems like a problem on your end since I got the patches from the
mailing list after sending them.  Here are the patches from the
archives.

[PATCH 01/13] http://lists.openwall.net/linux-ext4/2007/10/11/20
[PATCH 11/13] http://lists.openwall.net/linux-ext4/2007/10/11/29
[PATCH 12/13] http://lists.openwall.net/linux-ext4/2007/10/11/33


-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: compilebench numbers for ext4

2007-10-25 Thread Jose R. Santos
On Thu, 25 Oct 2007 14:43:55 -0400
Chris Mason <[EMAIL PROTECTED]> wrote:
> > 
> > 2) You mentioned that one of the goals of the benchmark is to measure
> > locality during directory aging, but the workloads seems too well
> > order to truly age the filesystem.  At least that's what I can gather
> > from the output the benchmark spits out.  It may be that Im not
> > understanding the relationship between INITIAL_DIRS and RUNS, but the
> > workload seem to been localized to do operations on a single dir at a
> > time.  Just wondering is this is truly stressing allocation algorithms
> > in a significant or realistic way.
> 
> A good question.  compilebench has two modes, and the default is better
> at aging then the run I graphed on ext4.  compilebench isn't trying to
> fragment individual files, but it is instead trying to fragment
> locality, and lower the overall performance of a directory tree.
> 
> In the default run, the patch, clean, and compile operations end up
> changing around groups of files in a somewhat random fashion (at least
> from the FS point of view).  But, it is still a workload where a good
> FS should be able to maintain locality and provide consistent results
> over time.
> 
> The ext4 numbers I sent here are from compilebench --makej, which is a
> shorter and less complex run.  It has a few simple phases:
> 
> * create some number of kernel trees sequentially
> * write new files into those trees in random order
> * read a three of the trees
> * delete all the trees
> 
> It is a very basic test that can give you a picture of directory
> layout, writeback performance and overall locality.

Thanks.  This clear a couple of things and I think I now follow the
direction you're heading into with this workload. 

> > 
> > I really want to use seekwatcher to test some of the stuff that I'm
> > doing for flex_bg feature but it barfs on me in my test machine.
> > 
> > running :sleep 10:
> > done running sleep 10
> > Device: /dev/sdh
> >   Total: 0 events (dropped 0), 1368 KiB data
> > blktrace done
> > Traceback (most recent call last):
> >   File "/usr/bin/seekwatcher", line 534, in ?
> > add_range(hist, step, start, size)
> >   File "/usr/bin/seekwatcher", line 522, in add_range
> > val = hist[slot]
> > IndexError: list index out of range
> 
> I don't think you have any events in the trace.  Try this instead:
> 
> echo 3 > /proc/sys/vm/drop_caches
> seekwatcher -t find-trace -d /dev/ -p 'find /usr/local -type f'

Nope, get the same error.  There does seem to be data recorded in the
trace files and iostat does show activity on the disk.

toolssf2 ~ # echo 3 > /proc/sys/vm/drop_caches
toolssf2 ~ # seekwatcher -t find-trace -d /dev/sdb3 -p 'find /root -type f 
>/dev/null'
running :find /root -type f >/dev/null:
done running find /root -type f >/dev/null
Device: /dev/sdb3
  CPU  0:0 events,  303 KiB data
  CPU  1:0 events,  262 KiB data
  CPU  2:0 events,  205 KiB data
  CPU  3:0 events,  302 KiB data
  CPU  4:0 events,  240 KiB data
  CPU  5:0 events,  281 KiB data
  CPU  6:0 events,  191 KiB data
  CPU  7:0 events,  281 KiB data
  Total: 0 events (dropped 0), 2061 KiB data
blktrace done
Traceback (most recent call last):
  File "/usr/bin/seekwatcher", line 534, in ?
add_range(hist, step, start, size)
  File "/usr/bin/seekwatcher", line 522, in add_range
val = hist[slot]
IndexError: list index out of range

> > This is running on a PPC64/gentoo combination.  Dont know if this
> > means anything to you.  I have a very basic algorithm for to take
> > advantage block group metadata grouping and want be able to better
> > visualize how different IO patterns take advantage or are hurt by the
> > feature.
> 
> I wanted to benchmark flexbg too, but couldn't quite figure out the
> correct patch combination ;)

Ill attach e2progfs and Kernel patches but do realize that these are
experimental patches that Im using to test what layout would work
best.  Don't take them too seriously as it is largely incomplete.

Currently trying to come up with workloads to test this and other
changes with.  Im am warming up to yours :)

To create a filesystem with the feature just do:
mke2fs -j -I 256 -O flex_bg /dev/xxx

Curently the number of block group meta data that are group together
is EXT4_DESC_PER_BLOCK() which matches the meta_bg feature. This turns
out to be 128 block groups.  This may(probably will) change in the
future but it give a general idea of what benefits can be had with
large grouping of metadata.

On compilebench it seems to show a 10x improvement on "create dir"
since Im currently testing on a SCSI disk with write cache disable.  I
would think the improvements would be a lot less noticeable on a SATA
drive since those usually ship with w

Re: compilebench numbers for ext4

2007-10-25 Thread Jose R. Santos
On Mon, 22 Oct 2007 19:31:04 -0400
Chris Mason <[EMAIL PROTECTED]> wrote:

> Hello everyone,
> 
> I recently posted some performance numbers for Btrfs with different
> blocksizes, and to help establish a baseline I did comparisons with
> Ext3.
> 
> The graphs, numbers and a basic description of compilebench are here:
> 
> http://oss.oracle.com/~mason/blocksizes/

I've been playing a bit with the workload and I have a couple of
comments.

1) I find the averaging of results at the end of the run misleading
unless you run a high number of directories.  A single very good result
due to page caching effects seems to skew the final results output.
Have you considered providing output of the standard deviation of the
data points as well in order to show how widely the results are spread. 

2) You mentioned that one of the goals of the benchmark is to measure
locality during directory aging, but the workloads seems too well order
to truly age the filesystem.  At least that's what I can gather from
the output the benchmark spits out.  It may be that Im not
understanding the relationship between INITIAL_DIRS and RUNS, but the
workload seem to been localized to do operations on a single dir at a
time.  Just wondering is this is truly stressing allocation algorithms
in a significant or realistic way.

Still playing and reading the code so I hope to have a clearer
understating of how it stresses the filesystem.  This would be a hard
one to simulate in ffsb (my favorite workload) due to the locality in
the way the dataset is access.  Would be interesting to let ffsb age
the filesystem and run then run compilebench to see how it does on an
unclean filesystem with lots of holes.

> Ext3 easily wins the read phase, but scores poorly while creating files
> and deleting them.  Since ext3 is winning the read phase, we can assume
> the file layout is fairly good.  I think most of the problems during the
> write phase are caused by pdflush doing metadata writeback.  The file
> data and metadata are written separately, and so we end up seeking
> between things that are actually close together.

If I understand how compilebench works, directories would be allocated
with in one or two block group boundaries so the data and meta data
would be in very close proximity.  I assume that doing random lookup
through the entire file set would show some weakness in the ext3 meta
data layout.

> Andreas asked me to give ext4 a try, so I grabbed the patch queue from
> Friday along with the latest Linus kernel.  The FS was created with:
> 
> mkfs.ext3 -I 256 /dev/
> mount -o delalloc,mballoc,data=ordered -t ext4dev /dev/
> 
> I did expect delayed allocation to help the write phases of
> compilebench, especially the parts where it writes out .o files in
> random order (basically writing medium sized files all over the
> directory tree).  But, every phase except reads showed huge
> improvements.
> 
> http://oss.oracle.com/~mason/compilebench/ext4/ext-create-compare.png
> http://oss.oracle.com/~mason/compilebench/ext4/ext-compile-compare.png
> http://oss.oracle.com/~mason/compilebench/ext4/ext-read-compare.png
> http://oss.oracle.com/~mason/compilebench/ext4/ext-rm-compare.png

I really want to use seekwatcher to test some of the stuff that I'm
doing for flex_bg feature but it barfs on me in my test machine.

running :sleep 10:
done running sleep 10
Device: /dev/sdh
  CPU  0:0 events,  121 KiB data
  CPU  1:0 events,  231 KiB data
  CPU  2:0 events,  121 KiB data
  CPU  3:0 events,  208 KiB data
  CPU  4:0 events,  137 KiB data
  CPU  5:0 events,  213 KiB data
  CPU  6:0 events,  120 KiB data
  CPU  7:0 events,  220 KiB data
  Total: 0 events (dropped 0), 1368 KiB data
blktrace done
Traceback (most recent call last):
  File "/usr/bin/seekwatcher", line 534, in ?
add_range(hist, step, start, size)
  File "/usr/bin/seekwatcher", line 522, in add_range
val = hist[slot]
IndexError: list index out of range

This is running on a PPC64/gentoo combination.  Dont know if this means
anything to you.  I have a very basic algorithm for to take advantage
block group metadata grouping and want be able to better visualize how
different IO patterns take advantage or are hurt by the feature.

> To match the ext4 numbers with Btrfs, I'd probably have to turn off data
> checksumming...
> 
> But oddly enough I saw very bad ext4 read throughput even when reading
> a single kernel tree (outside of compilebench).  The time to read the
> tree was almost 2x ext3.  Have others seen similar problems?
> 
> I think the ext4 delete times are so much better than ext3 because this
> is a single threaded test.  delayed allocation is able to get
> everything into a few extents, and these all end up in the inode.  So,
> the delete phase only needs to se

Re: compilebench numbers for ext4

2007-10-25 Thread Jose R. Santos
On Mon, 22 Oct 2007 19:31:04 -0400
Chris Mason <[EMAIL PROTECTED]> wrote:

> Hello everyone,
> 
> I recently posted some performance numbers for Btrfs with different
> blocksizes, and to help establish a baseline I did comparisons with
> Ext3.
> 
> The graphs, numbers and a basic description of compilebench are here:
> 
> http://oss.oracle.com/~mason/blocksizes/

I've been playing a bit with the workload and I have a couple of
comments.

1) I find the averaging of results at the end of the run misleading
unless you run a high number of directories.  A single very good result
due to page caching effects seems to skew the final results output.
Have you considered providing output of the standard deviation of the
data points as well in order to show how widely the results are spread. 

2) You mentioned that one of the goals of the benchmark is to measure
locality during directory aging, but the workloads seems too well order
to truly age the filesystem.  At least that's what I can gather from
the output the benchmark spits out.  It may be that Im not
understanding the relationship between INITIAL_DIRS and RUNS, but the
workload seem to been localized to do operations on a single dir at a
time.  Just wondering is this is truly stressing allocation algorithms
in a significant or realistic way.

Still playing and reading the code so I hope to have a clearer
understating of how it stresses the filesystem.  This would be a hard
one to simulate in ffsb (my favorite workload) due to the locality in
the way the dataset is access.  Would be interesting to let ffsb age
the filesystem and run then run compilebench to see how it does on an
unclean filesystem with lots of holes.

> Ext3 easily wins the read phase, but scores poorly while creating files
> and deleting them.  Since ext3 is winning the read phase, we can assume
> the file layout is fairly good.  I think most of the problems during the
> write phase are caused by pdflush doing metadata writeback.  The file
> data and metadata are written separately, and so we end up seeking
> between things that are actually close together.

If I understand how compilebench works, directories would be allocated
with in one or two block group boundaries so the data and meta data
would be in very close proximity.  I assume that doing random lookup
through the entire file set would show some weakness in the ext3 meta
data layout.

> Andreas asked me to give ext4 a try, so I grabbed the patch queue from
> Friday along with the latest Linus kernel.  The FS was created with:
> 
> mkfs.ext3 -I 256 /dev/
> mount -o delalloc,mballoc,data=ordered -t ext4dev /dev/
> 
> I did expect delayed allocation to help the write phases of
> compilebench, especially the parts where it writes out .o files in
> random order (basically writing medium sized files all over the
> directory tree).  But, every phase except reads showed huge
> improvements.
> 
> http://oss.oracle.com/~mason/compilebench/ext4/ext-create-compare.png
> http://oss.oracle.com/~mason/compilebench/ext4/ext-compile-compare.png
> http://oss.oracle.com/~mason/compilebench/ext4/ext-read-compare.png
> http://oss.oracle.com/~mason/compilebench/ext4/ext-rm-compare.png

I really want to use seekwatcher to test some of the stuff that I'm
doing for flex_bg feature but it barfs on me in my test machine.

running :sleep 10:
done running sleep 10
Device: /dev/sdh
  CPU  0:0 events,  121 KiB data
  CPU  1:0 events,  231 KiB data
  CPU  2:0 events,  121 KiB data
  CPU  3:0 events,  208 KiB data
  CPU  4:0 events,  137 KiB data
  CPU  5:0 events,  213 KiB data
  CPU  6:0 events,  120 KiB data
  CPU  7:0 events,  220 KiB data
  Total: 0 events (dropped 0), 1368 KiB data
blktrace done
Traceback (most recent call last):
  File "/usr/bin/seekwatcher", line 534, in ?
add_range(hist, step, start, size)
  File "/usr/bin/seekwatcher", line 522, in add_range
val = hist[slot]
IndexError: list index out of range

This is running on a PPC64/gentoo combination.  Dont know if this means
anything to you.  I have a very basic algorithm for to take advantage
block group metadata grouping and want be able to better visualize how
different IO patterns take advantage or are hurt by the feature.

> To match the ext4 numbers with Btrfs, I'd probably have to turn off data
> checksumming...
> 
> But oddly enough I saw very bad ext4 read throughput even when reading
> a single kernel tree (outside of compilebench).  The time to read the
> tree was almost 2x ext3.  Have others seen similar problems?
> 
> I think the ext4 delete times are so much better than ext3 because this
> is a single threaded test.  delayed allocation is able to get
> everything into a few extents, and these all end up in the inode.  So,
> the delete phase only needs to se

Re: [PATCH 10/14] Make e2fsck uninit block group aware.

2007-10-22 Thread Jose R. Santos
Hi Ted,

Aneesh was nice enough to explain how to checkout the next branch.
This is the only patch that broke so I'm attaching one that applies
cleanly.

-JRS





uninit_e2fsck_support
Description: Binary data


[PATCH 14/14] Add m_uninit test case.

2007-10-21 Thread Jose R. Santos

Add m_uninit test case.

Add test case to test for uninit block groups.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 tests/m_uninit/expect.1 |  166 +++
 tests/m_uninit/script   |4 +
 2 files changed, 170 insertions(+), 0 deletions(-)

diff --git a/tests/m_uninit/expect.1 b/tests/m_uninit/expect.1
new file mode 100644
index 000..4167ff5
--- /dev/null
+++ b/tests/m_uninit/expect.1
@@ -0,0 +1,166 @@
+Filesystem label=
+OS type: Linux
+Block size=1024 (log=0)
+Fragment size=1024 (log=0)
+32768 inodes, 131072 blocks
+6553 blocks (5.00%) reserved for the super user
+First data block=1
+Maximum filesystem blocks=67371008
+16 block groups
+8192 blocks per group, 8192 fragments per group
+2048 inodes per group
+Superblock backups stored on blocks: 
+   8193, 24577, 40961, 57345, 73729
+
+Writing inode tables: done
+Writing superblocks and filesystem accounting information: done
+
+Filesystem features: resize_inode dir_index filetype sparse_super uninit_groups
+ 
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 11/32768 files (9.1% non-contiguous), 5691/131072 blocks
+Exit status is 0
+
+Filesystem volume name:   
+Last mounted on:  
+Filesystem magic number:  0xEF53
+Filesystem revision #:1 (dynamic)
+Filesystem features:  resize_inode dir_index filetype sparse_super 
uninit_groups
+Default mount options:(none)
+Filesystem state: clean
+Errors behavior:  Continue
+Filesystem OS type:   Linux
+Inode count:  32768
+Block count:  131072
+Reserved block count: 6553
+Free blocks:  125381
+Free inodes:  32757
+First block:  1
+Block size:   1024
+Fragment size:1024
+Reserved GDT blocks:  256
+Blocks per group: 8192
+Fragments per group:  8192
+Inodes per group: 2048
+Inode blocks per group:   256
+Mount count:  0
+Check interval:   15552000 (6 months)
+Reserved blocks uid:  0
+Reserved blocks gid:  0
+First inode:  11
+Inode size:  128
+Default directory hash:   tea
+
+
+Group 0: (Blocks 1-8192)
+  Primary superblock at 1, Group descriptors at 2-2
+  Reserved GDT blocks at 3-258
+  Block bitmap at 259 (+258), Inode bitmap at 260 (+259)
+  Inode table at 261-516 (+260)
+  7662 free blocks, 2037 free inodes, 2 directories, 2037 unused inodes
+  Free blocks: 531-8192
+  Free inodes: 12-2048
+Group 1: (Blocks 8193-16384) [Inode not init]
+  Backup superblock at 8193, Group descriptors at 8194-8194
+  Reserved GDT blocks at 8195-8450
+  Block bitmap at 8451 (+258), Inode bitmap at 8452 (+259)
+  Inode table at 8453-8708 (+260)
+  7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 8709-16384
+  Free inodes: 
+Group 2: (Blocks 16385-24576) [Inode not init, Block not init]
+  Block bitmap at 16385 (+0), Inode bitmap at 16386 (+1)
+  Inode table at 16387-16642 (+2)
+  7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 
+  Free inodes: 
+Group 3: (Blocks 24577-32768) [Inode not init]
+  Backup superblock at 24577, Group descriptors at 24578-24578
+  Reserved GDT blocks at 24579-24834
+  Block bitmap at 24835 (+258), Inode bitmap at 24836 (+259)
+  Inode table at 24837-25092 (+260)
+  7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 25093-32768
+  Free inodes: 
+Group 4: (Blocks 32769-40960) [Inode not init, Block not init]
+  Block bitmap at 32769 (+0), Inode bitmap at 32770 (+1)
+  Inode table at 32771-33026 (+2)
+  7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 
+  Free inodes: 
+Group 5: (Blocks 40961-49152) [Inode not init]
+  Backup superblock at 40961, Group descriptors at 40962-40962
+  Reserved GDT blocks at 40963-41218
+  Block bitmap at 41219 (+258), Inode bitmap at 41220 (+259)
+  Inode table at 41221-41476 (+260)
+  7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 41477-49152
+  Free inodes: 
+Group 6: (Blocks 49153-57344) [Inode not init, Block not init]
+  Block bitmap at 49153 (+0), Inode bitmap at 49154 (+1)
+  Inode table at 49155-49410 (+2)
+  7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 
+  Free inodes: 
+Group 7: (Blocks 57345-65536) [Inode not init]
+  Backup superblock at 57345, Group descriptors at 57346-57346
+  Reserved GDT blocks at 57347-57602
+  Block bitmap at 57603 (+258), Inode bitmap at 57604 (+259)
+  Inode table at 57605-57860 (+260)
+  7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 57861-65536
+  Free inodes: 
+Grou

[PATCH 13/14] Add new mm_lazy test case.

2007-10-21 Thread Jose R. Santos

Add new mm_lazy test case.

Add test case for lazy bg feature.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 tests/m_lazy/expect.1 |  166 +
 tests/m_lazy/script   |4 +
 2 files changed, 170 insertions(+), 0 deletions(-)

diff --git a/tests/m_lazy/expect.1 b/tests/m_lazy/expect.1
new file mode 100644
index 000..32ca764
--- /dev/null
+++ b/tests/m_lazy/expect.1
@@ -0,0 +1,166 @@
+Filesystem label=
+OS type: Linux
+Block size=1024 (log=0)
+Fragment size=1024 (log=0)
+32768 inodes, 131072 blocks
+6553 blocks (5.00%) reserved for the super user
+First data block=1
+Maximum filesystem blocks=67371008
+16 block groups
+8192 blocks per group, 8192 fragments per group
+2048 inodes per group
+Superblock backups stored on blocks: 
+   8193, 24577, 40961, 57345, 73729
+
+Writing inode tables: done
+Writing superblocks and filesystem accounting information: done
+
+Filesystem features: resize_inode dir_index lazy_bg filetype sparse_super
+ 
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 28683/32768 files (0.0% non-contiguous), 77097/131072 blocks
+Exit status is 0
+
+Filesystem volume name:   
+Last mounted on:  
+Filesystem magic number:  0xEF53
+Filesystem revision #:1 (dynamic)
+Filesystem features:  resize_inode dir_index lazy_bg filetype sparse_super
+Default mount options:(none)
+Filesystem state: clean
+Errors behavior:  Continue
+Filesystem OS type:   Linux
+Inode count:  32768
+Block count:  131072
+Reserved block count: 6553
+Free blocks:  53975
+Free inodes:  4085
+First block:  1
+Block size:   1024
+Fragment size:1024
+Reserved GDT blocks:  256
+Blocks per group: 8192
+Fragments per group:  8192
+Inodes per group: 2048
+Inode blocks per group:   256
+Mount count:  0
+Check interval:   15552000 (6 months)
+Reserved blocks uid:  0
+Reserved blocks gid:  0
+First inode:  11
+Inode size:  128
+Default directory hash:   tea
+
+
+Group 0: (Blocks 1-8192)
+  Primary superblock at 1, Group descriptors at 2-2
+  Reserved GDT blocks at 3-258
+  Block bitmap at 259 (+258), Inode bitmap at 260 (+259)
+  Inode table at 261-516 (+260)
+  7662 free blocks, 2037 free inodes, 2 directories
+  Free blocks: 531-8192
+  Free inodes: 12-2048
+Group 1: (Blocks 8193-16384) [Inode not init]
+  Backup superblock at 8193, Group descriptors at 8194-8194
+  Reserved GDT blocks at 8195-8450
+  Block bitmap at 8451 (+258), Inode bitmap at 8452 (+259)
+  Inode table at 8453-8708 (+260)
+  7676 free blocks, 0 free inodes, 0 directories
+  Free blocks: 8709-16384
+  Free inodes: 
+Group 2: (Blocks 16385-24576) [Inode not init, Block not init]
+  Block bitmap at 16385 (+0), Inode bitmap at 16386 (+1)
+  Inode table at 16387-16642 (+2)
+  0 free blocks, 0 free inodes, 0 directories
+  Free blocks: 
+  Free inodes: 
+Group 3: (Blocks 24577-32768) [Inode not init]
+  Backup superblock at 24577, Group descriptors at 24578-24578
+  Reserved GDT blocks at 24579-24834
+  Block bitmap at 24835 (+258), Inode bitmap at 24836 (+259)
+  Inode table at 24837-25092 (+260)
+  7676 free blocks, 0 free inodes, 0 directories
+  Free blocks: 25093-32768
+  Free inodes: 
+Group 4: (Blocks 32769-40960) [Inode not init, Block not init]
+  Block bitmap at 32769 (+0), Inode bitmap at 32770 (+1)
+  Inode table at 32771-33026 (+2)
+  0 free blocks, 0 free inodes, 0 directories
+  Free blocks: 
+  Free inodes: 
+Group 5: (Blocks 40961-49152) [Inode not init]
+  Backup superblock at 40961, Group descriptors at 40962-40962
+  Reserved GDT blocks at 40963-41218
+  Block bitmap at 41219 (+258), Inode bitmap at 41220 (+259)
+  Inode table at 41221-41476 (+260)
+  7676 free blocks, 0 free inodes, 0 directories
+  Free blocks: 41477-49152
+  Free inodes: 
+Group 6: (Blocks 49153-57344) [Inode not init, Block not init]
+  Block bitmap at 49153 (+0), Inode bitmap at 49154 (+1)
+  Inode table at 49155-49410 (+2)
+  0 free blocks, 0 free inodes, 0 directories
+  Free blocks: 
+  Free inodes: 
+Group 7: (Blocks 57345-65536) [Inode not init]
+  Backup superblock at 57345, Group descriptors at 57346-57346
+  Reserved GDT blocks at 57347-57602
+  Block bitmap at 57603 (+258), Inode bitmap at 57604 (+259)
+  Inode table at 57605-57860 (+260)
+  7676 free blocks, 0 free inodes, 0 directories
+  Free blocks: 57861-65536
+  Free inodes: 
+Group 8: (Blocks 65537-73728) [Inode not init, Block not init]
+  Block bitmap at 65537 (+0), Inode bitmap at 65538 (+1)
+  Inode table at 65539-65794 (+2)
+  0 free blocks, 0 free inodes, 0 directories
+  Free blocks: 
+ 

[PATCH 12/14] Fix test cases.

2007-10-21 Thread Jose R. Santos

Fix test cases.

Some of the tools outputs have changed, so this patch fixes what to expect
from the outputs of the f_dupfsblks and m_raid_opt test cases.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 tests/f_dupfsblks/expect.1 |3 ++-
 tests/m_raid_opt/expect.1  |   33 ++---
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/tests/f_dupfsblks/expect.1 b/tests/f_dupfsblks/expect.1
index 661e164..32ce89b 100644
--- a/tests/f_dupfsblks/expect.1
+++ b/tests/f_dupfsblks/expect.1
@@ -44,7 +44,8 @@ Salvage? yes
 Directory inode 12, block 3, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (12) has deleted/unused inode 32.  Clear? yes
+Entry '' in ??? (12) has a zero-length name.
+Clear? yes
 
 Directory inode 12, block 4, offset 100: directory corrupted
 Salvage? yes
diff --git a/tests/m_raid_opt/expect.1 b/tests/m_raid_opt/expect.1
index 44c5b46..f5abc37 100644
--- a/tests/m_raid_opt/expect.1
+++ b/tests/m_raid_opt/expect.1
@@ -46,57 +46,68 @@ Setting filetype for entry '..' in ??? (11) to 2.
 Directory inode 11, block 1, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1063.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 2, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1064.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 3, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1065.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 4, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1066.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 5, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1067.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 6, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1068.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 7, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1069.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 8, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1070.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 9, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1071.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 10, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1072.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 11, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1073.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Pass 3: Checking directory connectivity
 '..' in / (2) is  (0), should be / (2).
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/14] Update uninit block group documetation for some of the utilities.

2007-10-21 Thread Jose R. Santos

Update uninit block group documetation for some of the utilities.

Upadates documentation man pages for mke2fs(8) and tune2fs(8)

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 misc/mke2fs.8.in  |9 -
 misc/tune2fs.8.in |   12 ++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/misc/mke2fs.8.in b/misc/mke2fs.8.in
index a3dc4a1..171df5b 100644
--- a/misc/mke2fs.8.in
+++ b/misc/mke2fs.8.in
@@ -210,7 +210,7 @@ for the filesystem.  (For administrators who are creating
 filesystems on RAID arrays, it is preferable to use the
 .I stride
 RAID parameter as part of the
-.B \-R
+.B \-E
 option rather than manipulating the number of blocks per group.)  
 This option is generally used by developers who
 are developing test cases.  
@@ -406,6 +406,13 @@ Store file type information in directory entries.
 .TP
 .B has_journal
 Create an ext3 journal (as if using the
+.TP
+.B uninit_groups
+Create a filesystem without initializing all of the groups.  This speeds
+up filesystem creation time noticably, and can also reduce
+.BR e2fsck time
+dramatically.  This feature causes the filesystem to be read-only in
+older kernels is not supported in most Linux kernels, use with caution.
 .B \-j
 option).
 @[EMAIL PROTECTED]
diff --git a/misc/tune2fs.8.in b/misc/tune2fs.8.in
index 2e617db..5ab1bd0 100644
--- a/misc/tune2fs.8.in
+++ b/misc/tune2fs.8.in
@@ -392,10 +392,16 @@ option.
 .TP
 .B sparse_super
 Limit the number of backup superblocks to save space on large filesystems.
+.TP
+.B uninit_groups
+Allow the kernel to initialize bitmaps and inode tables and keep a high
+watermark for the unused inodes in a filesystem, to reduce
+.BR e2fsck (8)
+time.
 .RE
 .IP
 After setting or clearing 
-.B sparse_super
+.BR sparse_super , " uninit_groups" ,
 and 
 .B filetype 
 filesystem features,
@@ -414,7 +420,9 @@ can be run to convert existing directories to the hashed 
B-tree format.
 Linux kernels before 2.0.39 and many 2.1 series kernels do not support
 the filesystems that use any of these features.
 Enabling certain filesystem features may prevent the filesystem from
-being mounted by kernels which do not support those features.
+being mounted by kernels which do not support those features.  The
+.B uninit_groups
+feature is not yet supported by any released kernel.
 .TP
 .BI \-r " reserved-blocks-count"
 Set the number of reserved filesystem blocks.
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/14] Make e2fsck uninit block group aware.

2007-10-21 Thread Jose R. Santos

Make e2fsck uninit block group aware.

This patch has all the necesary pieces to open and fix filesystems created
with the uninit block group feature.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 e2fsck/e2fsck.h  |2 +
 e2fsck/journal.c |2 +
 e2fsck/pass2.c   |   77 --
 e2fsck/pass5.c   |   61 +++
 e2fsck/problem.c |   42 +
 e2fsck/problem.h |   26 ++
 e2fsck/super.c   |   40 
 e2fsck/unix.c|   11 ++--
 e2fsck/util.c|   61 +++
 9 files changed, 292 insertions(+), 30 deletions(-)

diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 9ccffd8..a67322d 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -468,6 +468,8 @@ extern void e2fsck_read_bitmaps(e2fsck_t ctx);
 extern void e2fsck_write_bitmaps(e2fsck_t ctx);
 extern void preenhalt(e2fsck_t ctx);
 extern char *string_copy(e2fsck_t ctx, const char *str, int len);
+extern errcode_t e2fsck_zero_blocks(ext2_filsys fs, blk_t blk, int num,
+   blk_t *ret_blk, int *ret_count);
 #ifdef RESOURCE_TRACK
 extern void print_resource_track(const char *desc,
 struct resource_track *track,
diff --git a/e2fsck/journal.c b/e2fsck/journal.c
index f5f4647..ceade93 100644
--- a/e2fsck/journal.c
+++ b/e2fsck/journal.c
@@ -988,6 +988,8 @@ void e2fsck_move_ext3_journal(e2fsck_t ctx)
ext2fs_unmark_inode_bitmap(fs->inode_map, ino);
ext2fs_mark_ib_dirty(fs);
fs->group_desc[group].bg_free_inodes_count++;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,&fs->group_desc[group]);
fs->super->s_free_inodes_count++;
return;
 
diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
index 27f7136..047b5ca 100644
--- a/e2fsck/pass2.c
+++ b/e2fsck/pass2.c
@@ -151,7 +151,7 @@ void e2fsck_pass2(e2fsck_t ctx)

cd.pctx.errcode = ext2fs_dblist_iterate(fs->dblist, check_dir_block,
&cd);
-   if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
+   if (ctx->flags & E2F_FLAG_SIGNAL_MASK || ctx->flags & E2F_FLAG_RESTART)
return;
if (cd.pctx.errcode) {
fix_problem(ctx, PR_2_DBLIST_ITERATE, &cd.pctx);
@@ -736,7 +736,7 @@ static int check_dir_block(ext2_filsys fs,
buf = cd->buf;
ctx = cd->ctx;
 
-   if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
+   if (ctx->flags & E2F_FLAG_SIGNAL_MASK || ctx->flags & E2F_FLAG_RESTART)
return DIRENT_ABORT;

if (ctx->progress && (ctx->progress)(ctx, 2, cd->count++, cd->max))
@@ -833,6 +833,9 @@ static int check_dir_block(ext2_filsys fs,
dict_init(&de_dict, DICTCOUNT_T_MAX, dict_de_cmp);
prev = 0;
do {
+   int group;
+   ext2_ino_t first_unused_inode;
+
problem = 0;
dirent = (struct ext2_dir_entry *) (buf + offset);
cd->pctx.dirent = dirent;
@@ -882,12 +885,6 @@ static int check_dir_block(ext2_filsys fs,
 (dirent->inode < EXT2_FIRST_INODE(fs->super))) ||
(dirent->inode > fs->super->s_inodes_count)) {
problem = PR_2_BAD_INO;
-   } else if (!(ext2fs_test_inode_bitmap(ctx->inode_used_map,
-  dirent->inode))) {
-   /*
-* If the inode is unused, offer to clear it.
-*/
-   problem = PR_2_UNUSED_INODE;
} else if (ctx->inode_bb_map &&
   (ext2fs_test_inode_bitmap(ctx->inode_bb_map,
 dirent->inode))) {
@@ -964,6 +961,67 @@ static int check_dir_block(ext2_filsys fs,
return DIRENT_ABORT;
}
 
+   group = ext2fs_group_of_ino(fs, dirent->inode);
+   first_unused_inode = group * fs->super->s_inodes_per_group +
+   1 + fs->super->s_inodes_per_group -
+   fs->group_desc[group].bg_itable_unused;
+   cd->pctx.group = group;
+
+   /*
+* Check if the inode was missed out because _INODE_UNINIT
+* flag was set or bg_itable_unused was incorrect.
+* If that is the case restart e2fsck.
+* XXX Optimisations TODO:
+* 1. only restart e2fsck once
+* 2. only exposed inode

[PATCH 09/14] Make debugfs uninit block group aware.

2007-10-21 Thread Jose R. Santos

Make debugfs uninit block group aware.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 debugfs/debugfs.c |   18 +++---
 1 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/debugfs/debugfs.c b/debugfs/debugfs.c
index 190c4b7..c802b63 100644
--- a/debugfs/debugfs.c
+++ b/debugfs/debugfs.c
@@ -286,7 +286,10 @@ void do_show_super_stats(int argc, char *argv[])
FILE*out;
struct ext2_group_desc *gdp;
int c, header_only = 0;
-   int numdirs = 0, first;
+   int numdirs = 0, first, gdt_csum;
+
+   gdt_csum = EXT2_HAS_RO_COMPAT_FEATURE(current_fs->super,
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM);
 
reset_getopt();
while ((c = getopt (argc, argv, "h")) != EOF) {
@@ -322,7 +325,7 @@ void do_show_super_stats(int argc, char *argv[])
"inode table at %u\n"
"   %d free %s, "
"%d free %s, "
-   "%d used %s\n",
+   "%d used %s%s",
i, gdp->bg_block_bitmap,
gdp->bg_inode_bitmap, gdp->bg_inode_table,
gdp->bg_free_blocks_count,
@@ -331,12 +334,21 @@ void do_show_super_stats(int argc, char *argv[])
gdp->bg_free_inodes_count != 1 ? "inodes" : "inode",
gdp->bg_used_dirs_count,
gdp->bg_used_dirs_count != 1 ? "directories"
-   : "directory");
+   : "directory", gdt_csum ? ", " : "\n");
+   if (gdt_csum)
+   fprintf(out, "%d unused %s\n",
+   gdp->bg_itable_unused,
+   gdp->bg_itable_unused != 1 ? "inodes":"inode");
first = 1;
print_bg_opts(gdp, EXT2_BG_INODE_UNINIT, "Inode not init",
  &first, out);
print_bg_opts(gdp, EXT2_BG_BLOCK_UNINIT, "Block not init",
  &first, out);
+   if (gdt_csum) {
+   fprintf(out, "%sChecksum 0x%04x",
+   first ? "   [":", ", gdp->bg_checksum);
+   first = 0;
+   }
if (!first)
fputs("]\n", out);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/14] Make resize2fs uninit block group aware.

2007-10-21 Thread Jose R. Santos

Make resize2fs uninit block group aware.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 resize/main.c  |7 +++
 resize/resize2fs.c |   29 -
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/resize/main.c b/resize/main.c
index 7db4ebc..7c1d0c1 100644
--- a/resize/main.c
+++ b/resize/main.c
@@ -298,6 +298,13 @@ int main (int argc, char ** argv)
printf (_("Couldn't find valid filesystem superblock.\n"));
exit (1);
}
+
+   if (fs->super->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_GDT_CSUM) {
+   com_err(program_name, EXT2_ET_RO_UNSUPP_FEATURE,
+   ":- uninit_groups");
+   exit(1);
+   }
+
/*
 * Check for compatibility with the feature sets.  We need to
 * be more stringent than ext2fs_open().
diff --git a/resize/resize2fs.c b/resize/resize2fs.c
index 0d6a082..ce0111c 100644
--- a/resize/resize2fs.c
+++ b/resize/resize2fs.c
@@ -339,7 +339,9 @@ retry:
numblocks = fs->super->s_blocks_per_group;
i = old_fs->group_desc_count - 1;
fs->group_desc[i].bg_free_blocks_count += (numblocks-old_numblocks);
-   
+   fs->group_desc[i].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, i, &fs->group_desc[i]);
+
/*
 * If the number of block groups is staying the same, we're
 * done and can exit now.  (If the number block groups is
@@ -415,6 +417,8 @@ retry:
fs->group_desc[i].bg_free_inodes_count =
fs->super->s_inodes_per_group;
fs->group_desc[i].bg_used_dirs_count = 0;
+   fs->group_desc[i].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, i,&fs->group_desc[i]);
 
retval = ext2fs_allocate_group_table(fs, i, 0);
if (retval) goto errout;
@@ -1223,9 +1227,13 @@ static errcode_t inode_scan_and_fix(ext2_resize_t rfs)
if (retval) goto errout;
 
group = (new_inode-1) / 
EXT2_INODES_PER_GROUP(rfs->new_fs->super);
-   if (LINUX_S_ISDIR(inode.i_mode))
+   if (LINUX_S_ISDIR(inode.i_mode)) {
rfs->new_fs->group_desc[group].bg_used_dirs_count++;
-   
+   rfs->new_fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(rfs->new_fs->super,group,
+  &rfs->new_fs->group_desc[group]);
+   }
+
 #ifdef RESIZE2FS_DEBUG
if (rfs->flags & RESIZE_DEBUG_INODEMAP)
printf("Inode moved %u->%u\n", ino, new_inode);
@@ -1478,6 +1486,9 @@ static errcode_t move_itables(ext2_resize_t rfs)
ext2fs_unmark_block_bitmap(fs->block_map, blk);
 
rfs->old_fs->group_desc[i].bg_inode_table = new_blk;
+   rfs->old_fs->group_desc[i].bg_checksum =
+   ext2fs_group_desc_csum(rfs->old_fs->super, i,
+  &rfs->old_fs->group_desc[i]);
ext2fs_mark_super_dirty(rfs->old_fs);
ext2fs_flush(rfs->old_fs);
 
@@ -1575,8 +1586,12 @@ static errcode_t 
ext2fs_calculate_summary_stats(ext2_filsys fs)
count++;
if ((count == fs->super->s_blocks_per_group) ||
(blk == fs->super->s_blocks_count-1)) {
-   fs->group_desc[group++].bg_free_blocks_count =
+   fs->group_desc[group].bg_free_blocks_count =
group_free;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,
+  &fs->group_desc[group]);
+   group++;
count = 0;
group_free = 0;
}
@@ -1600,8 +1615,12 @@ static errcode_t 
ext2fs_calculate_summary_stats(ext2_filsys fs)
count++;
if ((count == fs->super->s_inodes_per_group) ||
(ino == fs->super->s_inodes_count)) {
-   fs->group_desc[group++].bg_free_inodes_count =
+   fs->group_desc[group].bg_free_inodes_count =
group_free;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,
+  &fs->group_desc[group]);
+   group++;

[PATCH 07/14] Make dumpe2fs uninit block group aware.

2007-10-21 Thread Jose R. Santos

Make dumpe2fs uninit block group aware.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 misc/dumpe2fs.c |   13 +
 1 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/misc/dumpe2fs.c b/misc/dumpe2fs.c
index d4d95bb..c08528a 100644
--- a/misc/dumpe2fs.c
+++ b/misc/dumpe2fs.c
@@ -112,7 +112,8 @@ static void print_bg_opts(ext2_filsys fs, dgrp_t i)
 {
int first = 1, bg_flags;
 
-   if (fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_LAZY_BG)
+   if (fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_LAZY_BG ||
+   fs->super->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_GDT_CSUM)
bg_flags = fs->group_desc[i].bg_flags;
else
bg_flags = 0;
@@ -210,11 +211,15 @@ static void list_desc (ext2_filsys fs)
diff = fs->group_desc[i].bg_inode_table - first_block;
if (diff > 0)
printf(" (+%ld)", diff);
-   printf (_("\n  %d free blocks, %d free inodes, "
- "%d directories\n"),
+   printf (_("\n  %u free blocks, %u free inodes, "
+ "%u directories%s"),
fs->group_desc[i].bg_free_blocks_count,
fs->group_desc[i].bg_free_inodes_count,
-   fs->group_desc[i].bg_used_dirs_count);
+   fs->group_desc[i].bg_used_dirs_count,
+   fs->group_desc[i].bg_itable_unused ? "" : "\n");
+   if (fs->group_desc[i].bg_itable_unused)
+   printf (_(", %u unused inodes\n"),
+   fs->group_desc[i].bg_itable_unused);
if (block_bitmap) {
fputs(_("  Free blocks: "), stdout);
ext2fs_get_block_bitmap_range(fs->block_map, 
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/14] Make tune2fs uninit block group aware.

2007-10-21 Thread Jose R. Santos

Make tune2fs uninit block group aware.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 misc/tune2fs.c |   12 ++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index 833b994..e2ebc08 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -98,7 +98,8 @@ static __u32 ok_features[3] = {
EXT3_FEATURE_COMPAT_HAS_JOURNAL |
EXT2_FEATURE_COMPAT_DIR_INDEX,  /* Compat */
EXT2_FEATURE_INCOMPAT_FILETYPE, /* Incompat */
-   EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER /* R/O compat */
+   EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER |   /* R/O compat */
+   EXT4_FEATURE_RO_COMPAT_GDT_CSUM
 };
 
 /*
@@ -213,6 +214,8 @@ static int release_blocks_proc(ext2_filsys fs, blk_t 
*blocknr,
ext2fs_unmark_block_bitmap(fs->block_map,block);
group = ext2fs_group_of_blk(fs, block);
fs->group_desc[group].bg_free_blocks_count++;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,&fs->group_desc[group]);
fs->super->s_free_blocks_count++;
return 0;
 }
@@ -282,7 +285,7 @@ static void update_mntopts(ext2_filsys fs, char *mntopts)
 static void update_feature_set(ext2_filsys fs, char *features)
 {
int sparse, old_sparse, filetype, old_filetype;
-   int journal, old_journal, dxdir, old_dxdir;
+   int journal, old_journal, dxdir, old_dxdir, uninit, old_uninit;
struct ext2_super_block *sb= fs->super;
__u32   old_compat, old_incompat, old_ro_compat;
 
@@ -298,6 +301,8 @@ static void update_feature_set(ext2_filsys fs, char 
*features)
EXT3_FEATURE_COMPAT_HAS_JOURNAL;
old_dxdir = sb->s_feature_compat &
EXT2_FEATURE_COMPAT_DIR_INDEX;
+   old_uninit = sb->s_feature_ro_compat &
+   EXT4_FEATURE_RO_COMPAT_GDT_CSUM;
if (e2p_edit_feature(features, &sb->s_feature_compat,
 ok_features)) {
fprintf(stderr, _("Invalid filesystem option set: %s\n"),
@@ -312,6 +317,8 @@ static void update_feature_set(ext2_filsys fs, char 
*features)
EXT3_FEATURE_COMPAT_HAS_JOURNAL;
dxdir = sb->s_feature_compat &
EXT2_FEATURE_COMPAT_DIR_INDEX;
+   old_uninit = sb->s_feature_ro_compat &
+   EXT4_FEATURE_RO_COMPAT_GDT_CSUM;
if (old_journal && !journal) {
if ((mount_flags & EXT2_MF_MOUNTED) &&
!(mount_flags & EXT2_MF_READONLY)) {
@@ -358,6 +365,7 @@ static void update_feature_set(ext2_filsys fs, char 
*features)
 sb->s_feature_incompat))
ext2fs_update_dynamic_rev(fs);
if ((sparse != old_sparse) ||
+   (uninit != old_uninit) ||
(filetype != old_filetype)) {
sb->s_state &= ~EXT2_VALID_FS;
printf("\n%s\n", _(please_fsck));
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/14] Add support for creating filesystems using uninit block group.

2007-10-21 Thread Jose R. Santos

Add support for creating filesystems using uninit block group.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 misc/mke2fs.c |   44 
 1 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 4a6cace..8360c51 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -432,6 +432,8 @@ static void write_inode_tables(ext2_filsys fs)
num, blk, error_message(retval));
exit(1);
}
+   /* The kernel doesn't need to zero the itable blocks */
+   fs->group_desc[i].bg_flags |= EXT2_BG_INODE_ZEROED;
}
if (sync_kludge) {
if (sync_kludge == 1)
@@ -447,34 +449,49 @@ static void write_inode_tables(ext2_filsys fs)
 static void setup_lazy_bg(ext2_filsys fs)
 {
dgrp_t i;
-   int blks;
+   int blks, csum_flag;
struct ext2_super_block *sb = fs->super;
struct ext2_group_desc *bg = fs->group_desc;
 
-   if (EXT2_HAS_COMPAT_FEATURE(fs->super, 
-   EXT2_FEATURE_COMPAT_LAZY_BG)) {
+   csum_flag = EXT2_HAS_RO_COMPAT_FEATURE(fs->super,
+  EXT4_FEATURE_RO_COMPAT_GDT_CSUM);
+   if (EXT2_HAS_COMPAT_FEATURE(fs->super, EXT2_FEATURE_COMPAT_LAZY_BG) ||
+   csum_flag) {
for (i = 0; i < fs->group_desc_count; i++, bg++) {
if ((i == 0) ||
-   (i == fs->group_desc_count-1))
+   (i == fs->group_desc_count - 1 && !csum_flag))
continue;
if (bg->bg_free_inodes_count ==
sb->s_inodes_per_group) {
-   bg->bg_free_inodes_count = 0;
bg->bg_flags |= EXT2_BG_INODE_UNINIT;
-   sb->s_free_inodes_count -= 
-   sb->s_inodes_per_group;
+   if (!csum_flag) {
+   bg->bg_free_inodes_count = 0;
+   sb->s_free_inodes_count -=
+   sb->s_inodes_per_group;
+   }
}
+
+   /* Skip groups with GDT backups because the resize
+* inode has blocks allocated in them, and the last
+* group because it needs block bitmap padding. */
+   if ((ext2fs_bg_has_super(fs, i) &&
+sb->s_reserved_gdt_blocks) ||
+   i == fs->group_desc_count - 1)
+   continue;
+
blks = ext2fs_super_and_bgd_loc(fs, i, 0, 0, 0, 0);
-   if (bg->bg_free_blocks_count == blks) {
-   bg->bg_free_blocks_count = 0;
+   if (bg->bg_free_blocks_count == blks &&
+   bg->bg_flags & EXT2_BG_INODE_UNINIT) {
bg->bg_flags |= EXT2_BG_BLOCK_UNINIT;
-   sb->s_free_blocks_count -= blks;
+   if (!csum_flag) {
+   bg->bg_free_blocks_count = 0;
+   sb->s_free_blocks_count -= blks;
+   }
}
}
}
 }
 
-
 static void create_root_dir(ext2_filsys fs)
 {
errcode_t   retval;
@@ -874,7 +891,8 @@ static __u32 ok_features[3] = {
EXT2_FEATURE_INCOMPAT_FILETYPE| /* Incompat */
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|
EXT2_FEATURE_INCOMPAT_META_BG,
-   EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER /* R/O compat */
+   EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|/* R/O compat */
+   EXT4_FEATURE_RO_COMPAT_GDT_CSUM
 };
 
 
@@ -1750,6 +1768,8 @@ int main (int argc, char *argv[])
}
 no_journal:
 
+   if (!super_only)
+   ext2fs_set_gdt_csum(fs);
if (!quiet)
printf(_("Writing superblocks and "
   "filesystem accounting information: "));
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/14] Rename feature name from gdt_checksum to uninit_groups.

2007-10-21 Thread Jose R. Santos

Rename feature name from gdt_checksum to uninit_groups.

This name is a more intuitive option when running mke2fs.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 lib/e2p/feature.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
index fe7e65a..7c25736 100644
--- a/lib/e2p/feature.c
+++ b/lib/e2p/feature.c
@@ -45,7 +45,7 @@ static struct feature feature_list[] = {
{   E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_HUGE_FILE,
"huge_file" },
{   E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_GDT_CSUM,
-   "gdt_checksum" },
+   "uninit_groups" },
{   E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_DIR_NLINK,
"dir_nlink" },
{   E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE,
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/14] Add uninit block group support on libe2fs.

2007-10-21 Thread Jose R. Santos

Add uninit block group support on libe2fs.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 lib/ext2fs/alloc_stats.c  |   25 +
 lib/ext2fs/alloc_tables.c |5 ++---
 lib/ext2fs/ext2fs.h   |4 +++-
 lib/ext2fs/initialize.c   |2 ++
 lib/ext2fs/inode.c|   29 +++--
 lib/ext2fs/openfs.c   |   16 
 lib/ext2fs/rw_bitmaps.c   |   14 ++
 7 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/lib/ext2fs/alloc_stats.c b/lib/ext2fs/alloc_stats.c
index 4088f7b..ee4a1e4 100644
--- a/lib/ext2fs/alloc_stats.c
+++ b/lib/ext2fs/alloc_stats.c
@@ -27,6 +27,27 @@ void ext2fs_inode_alloc_stats2(ext2_filsys fs, ext2_ino_t 
ino,
fs->group_desc[group].bg_free_inodes_count -= inuse;
if (isdir)
fs->group_desc[group].bg_used_dirs_count += inuse;
+
+   /* We don't strictly need to be clearing these if inuse < 0
+* (i.e. freeing inodes) but it also means something is bad. */
+   fs->group_desc[group].bg_flags &= ~(EXT2_BG_INODE_UNINIT |
+   EXT2_BG_BLOCK_UNINIT);
+   if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super,
+  EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+   ext2_ino_t first_unused_inode = fs->super->s_inodes_per_group -
+   fs->group_desc[group].bg_itable_unused +
+   group * fs->super->s_inodes_per_group + 1;
+
+   if (ino >= first_unused_inode)
+   fs->group_desc[group].bg_itable_unused =
+   group * fs->super->s_inodes_per_group +
+   fs->super->s_inodes_per_group - ino;
+
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,
+  &fs->group_desc[group]);
+   }
+
fs->super->s_free_inodes_count -= inuse;
ext2fs_mark_super_dirty(fs);
ext2fs_mark_ib_dirty(fs);
@@ -46,6 +67,10 @@ void ext2fs_block_alloc_stats(ext2_filsys fs, blk_t blk, int 
inuse)
else
ext2fs_unmark_block_bitmap(fs->block_map, blk);
fs->group_desc[group].bg_free_blocks_count -= inuse;
+   fs->group_desc[group].bg_flags &= ~EXT2_BG_BLOCK_UNINIT;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,&fs->group_desc[group]);
+
fs->super->s_free_blocks_count -= inuse;
ext2fs_mark_super_dirty(fs);
ext2fs_mark_bb_dirty(fs);
diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..290e54b 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -95,13 +95,12 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
ext2fs_mark_block_bitmap(bmap, blk);
fs->group_desc[group].bg_inode_table = new_blk;
}
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,&fs->group_desc[group]);
 
-   
return 0;
 }
 
-   
-
 errcode_t ext2fs_allocate_tables(ext2_filsys fs)
 {
errcode_t   retval;
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 58c8606..2f9ac7f 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -322,6 +322,7 @@ typedef struct ext2_struct_inode_scan *ext2_inode_scan;
 #define EXT2_SF_BAD_EXTRA_BYTES0x0004
 #define EXT2_SF_SKIP_MISSING_ITABLE0x0008
 #define EXT2_SF_DO_LAZY0x0010
+#define EXT2_SF_DO_CSUM0x0020
 
 /*
  * ext2fs_check_if_mounted flags
@@ -441,7 +442,8 @@ typedef struct ext2_icount *ext2_icount_t;
 EXT3_FEATURE_INCOMPAT_RECOVER)
 #endif
 #define EXT2_LIB_FEATURE_RO_COMPAT_SUPP
(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
-EXT2_FEATURE_RO_COMPAT_LARGE_FILE)
+EXT2_FEATURE_RO_COMPAT_LARGE_FILE|\
+EXT4_FEATURE_RO_COMPAT_GDT_CSUM)
 
 /*
  * These features are only allowed if EXT2_FLAG_SOFTSUPP_FEATURES is passed
diff --git a/lib/ext2fs/initialize.c b/lib/ext2fs/initialize.c
index 16e9eaa..5710f04 100644
--- a/lib/ext2fs/initialize.c
+++ b/lib/ext2fs/initialize.c
@@ -374,6 +374,8 @@ ipg_retry:
fs->group_desc[i].bg_free_inodes_count =
fs->super->s_inodes_per_group;
fs->group_desc[i].bg_used_dirs_count = 0;
+   fs->group_desc[i].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, i,&fs->group_desc[i]);
}

c = (char) 255;
diff --git a/lib/ext2fs/

[PATCH 02/14] Add initial checksum support.

2007-10-21 Thread Jose R. Santos

Add initial checksum support.

- Add support for computing CRC-16 value.
- Add call to check/verify/set csum on block_groups.
- Add a test program to verify csum operations.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-Off-By: Andreas Dilger <[EMAIL PROTECTED]>
--

 lib/ext2fs/Makefile.in |   19 ++
 lib/ext2fs/crc16.c |   59 +++
 lib/ext2fs/crc16.h |   46 +++
 lib/ext2fs/csum.c  |  149 
 lib/ext2fs/ext2_fs.h   |1 
 lib/ext2fs/ext2fs.h|7 ++
 lib/ext2fs/tst_csum.c  |  113 
 7 files changed, 393 insertions(+), 1 deletions(-)

diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in
index ca65427..dff2774 100644
--- a/lib/ext2fs/Makefile.in
+++ b/lib/ext2fs/Makefile.in
@@ -28,6 +28,8 @@ OBJS= $(DEBUGFS_LIB_OBJS) $(RESIZE_LIB_OBJS) 
$(E2IMAGE_LIB_OBJS) \
bmap.o \
check_desc.o \
closefs.o \
+   crc16.o \
+   csum.o \
dblist.o \
dblist_dir.o \
dirblock.o \
@@ -82,6 +84,8 @@ SRCS= ext2_err.c \
$(srcdir)/bmap.c \
$(srcdir)/check_desc.c \
$(srcdir)/closefs.c \
+   $(srcdir)/crc16.c \
+   $(srcdir)/csum.c \
$(srcdir)/dblist.c \
$(srcdir)/dblist_dir.c \
$(srcdir)/dirblock.c \
@@ -126,6 +130,7 @@ SRCS= ext2_err.c \
$(srcdir)/tst_badblocks.c \
$(srcdir)/tst_bitops.c \
$(srcdir)/tst_byteswap.c \
+   $(srcdir)/tst_csum.c \
$(srcdir)/tst_getsize.c \
$(srcdir)/tst_iscan.c \
$(srcdir)/unix_io.c \
@@ -239,17 +244,23 @@ ext2_tdbtool: tdbtool.o
@echo " LD $@"
@$(CC) -o ext2_tdbtool tdbtool.o tdb.o
 
+tst_csum: tst_csum.o csum.o crc16.o $(STATIC_LIBEXT2FS)
+   @echo " LD $@"
+   @$(CC) -o tst_csum csum.o tst_csum.o crc16.o $(STATIC_LIBEXT2FS) \
+   $(LIBCOM_ERR)
+
 mkjournal: mkjournal.c $(STATIC_LIBEXT2FS)
@echo " LD $@"
@$(CC) -o mkjournal $(srcdir)/mkjournal.c -DDEBUG $(STATIC_LIBEXT2FS) 
$(LIBCOM_ERR) $(ALL_CFLAGS)
 
-check:: tst_bitops tst_badblocks tst_iscan tst_types tst_icount tst_super_size
+check:: tst_bitops tst_badblocks tst_iscan tst_types tst_icount tst_super_size 
tst_types tst_csum
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_bitops
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_badblocks
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_iscan
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_types
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_icount
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_super_size
+   LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_csum
 
 installdirs::
@echo " MKINSTALLDIRS $(libdir) $(includedir)/ext2fs"
@@ -357,6 +368,10 @@ closefs.o: $(srcdir)/closefs.c $(srcdir)/ext2_fs.h \
  $(srcdir)/ext2fs.h $(srcdir)/ext2_fs.h $(srcdir)/ext3_extents.h \
  $(top_srcdir)/lib/et/com_err.h $(srcdir)/ext2_io.h \
  $(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/bitops.h
+crc16.o: $(srcdir)/crc16.c $(srcdir)/ext2_fs.h $(srcdir)/crc16.h \
+ $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h $(srcdir)/ext2_fs.h
+csum.o: $(srcdir)/csum.c $(srcdir)/ext2_fs.h \
+ $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h $(srcdir)/ext2_fs.h
 dblist.o: $(srcdir)/dblist.c $(srcdir)/ext2_fs.h \
  $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fsP.h \
  $(srcdir)/ext2fs.h $(srcdir)/ext2_fs.h $(srcdir)/ext3_extents.h \
@@ -571,3 +586,5 @@ tst_iscan.o: $(srcdir)/tst_iscan.c $(srcdir)/ext2_fs.h \
  $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h \
  $(srcdir)/ext2_fs.h $(srcdir)/ext3_extents.h $(top_srcdir)/lib/et/com_err.h \
  $(srcdir)/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/bitops.h
+tst_csum.o: $(srcdir)/tst_csum.c $(srcdir)/ext2_fs.h \
+ $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h
diff --git a/lib/ext2fs/crc16.c b/lib/ext2fs/crc16.c
new file mode 100644
index 000..d480dba
--- /dev/null
+++ b/lib/ext2fs/crc16.c
@@ -0,0 +1,59 @@
+/*
+ *  crc16.c
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include "crc16.h"
+
+/** CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1) */
+__u16 const crc16_table[256] = {
+   0x, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
+   0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
+   0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
+   0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
+   0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
+   0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
+   0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
+   

[PATCH 01/14] Reorder some of the $(SRCS) in alphabetical order.

2007-10-21 Thread Jose R. Santos

Reorder some of the $(SRCS) in alphabetical order.

All files under $(OBJS) and $(SRCS) should be in alphabetical order
but this is not always the case.  Let fix some some of these before
applying new files to the list of $(SRCS).

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/ext2fs/Makefile.in |   12 ++--
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in
index 03ce131..ca65427 100644
--- a/lib/ext2fs/Makefile.in
+++ b/lib/ext2fs/Makefile.in
@@ -123,16 +123,16 @@ SRCS= ext2_err.c \
$(srcdir)/swapfs.c \
$(srcdir)/tdb.c \
$(srcdir)/test_io.c \
-   $(srcdir)/unix_io.c \
-   $(srcdir)/unlink.c \
-   $(srcdir)/valid_blk.c \
-   $(srcdir)/version.c \
-   $(srcdir)/write_bb_file.c \
$(srcdir)/tst_badblocks.c \
$(srcdir)/tst_bitops.c \
$(srcdir)/tst_byteswap.c \
$(srcdir)/tst_getsize.c \
-   $(srcdir)/tst_iscan.c
+   $(srcdir)/tst_iscan.c \
+   $(srcdir)/unix_io.c \
+   $(srcdir)/unlink.c \
+   $(srcdir)/valid_blk.c \
+   $(srcdir)/version.c \
+   $(srcdir)/write_bb_file.c
 
 HFILES= bitops.h ext2fs.h ext2_io.h ext2_fs.h ext2_ext_attr.h ext3_extents.h \
tdb.h
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2 00/14][e2fsprogs] Uninit block group break down

2007-10-21 Thread Jose R. Santos

The Uninit block group patch is painful to review since it's quite a
large patch.  Since simple patches are easier to review, I decided to
break down the patch into smaller logical pieces that should be easier
to review, clean and fix.
 
Aside from the breakdown, this is essentially the same patch that
Avantika submitted to the mailing list a couple of weeks ago with some
minor changer to allow it to apply on the latest git tree.  

Tested on PPC64 with 2.6.23-rc9 + ext4 patch queue.

Changes from the last series:

- Changed file ordering under $(OBJS) and $(SRCS)
- Fix typos to make it work under PPC64
- Added Andreas fixes for PowerPC.
http://lists.openwall.net/linux-ext4/2007/06/05/31
- Use ext2fs/ext2_types.h instead of linux/types.h

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: What's cooking in e2fsprogs.git (topics)

2007-10-16 Thread Jose R. Santos
On Tue, 16 Oct 2007 00:48:21 -0400
"Theodore Ts'o" <[EMAIL PROTECTED]> wrote:

> * js/flex-bg (Mon Aug 13 23:33:14 2007 -0500) 7 commits
>  - New bitmap and inode table allocation for FLEX_BG

This one was more of an RFC to let folks play with different meta data
layouts.  Should probably stay outside the tree for now.  The other
three patches are whats needed to get e2fsprog in sync with the kernel.

>  - Relax group descriptor checking.

This one needs updating to be more in line with what we define FLEX_BG
to be in the kernel.  I thing I also encounter conflicts when pulling
from the git tree a while back, so the one in the mailing list will not
apply cleanly.
 
>  - Allow FLEX_BG to be use as a feature option at mke2fs time.
>  - Reserve the INCOMPAT feature number for FLEX_BG.
> 
> Relatively straight forward, so we should be able to merge this soon.

The last two are fine but its still missing documentation.  Skip this
series for now.  I will update and resend.

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: question about patch queue and ext4-git

2007-10-15 Thread Jose R. Santos
On Mon, 15 Oct 2007 20:46:32 +0800
Coly Li <[EMAIL PROTECTED]> wrote:

> Thanks for the replying :-)
> 
> Jose R. Santos wrote:
> > On Mon, 15 Oct 2007 13:36:05 +0800
> > Coly Li <[EMAIL PROTECTED]> wrote:
> > 
> >> Now in my mind there are several words for ext4 patches, most frequently 
> >> one are "patch queue".
> >>
> >> I see the patches in patch queue from
> >> http://www2.kernel.org/pub/linux/kernel/people/tytso/ext4-patches/LATEST/broken-out/
> >>  .
> >> Also I confirm some of the patches are in ext4 git tree now, but I am not 
> >> sure for two questions:
> >> 1) Whether all the patches are in ext4 git tree ?
> > 
> > Yes, all these patches should be in ext4 git tree.
> > 
> >> 2) This patch queue is only used to push ext4 patch into upstream ?
> > 
> > The patch queue series is divided into stable and unstable patches.
> > The stable patches are the one usually the ones used to push back
> > upstream, while the unstable section has the patches for development
> > purposes only and are not ready for pushing upstream (and some may
> > never make it in).
> 
> How to recognize which patch is stable patch and which one is unstable patch ?

Look in the series file.  The mark where the stable patches end is
documented there.
  
> > 
> >> Also there is a patch-queue git at 
> >> http://repo.or.cz/w/ext4-patch-queue.git , is it same to the
> >> patches in 
> >> http://www2.kernel.org/pub/linux/kernel/people/tytso/ext4-patches/ ?
> > 
> > Same thing bug in git format.  I believe Ted updates his patch queue
> > from the patches in the git tree repo, so  if you want latest/greatest
> > the git tree is what you want.
> > 
> >> Thanks for clarifying :-)
> >>
> >>
> > 
> > -JRS
> 

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: question about patch queue and ext4-git

2007-10-15 Thread Jose R. Santos
On Mon, 15 Oct 2007 13:36:05 +0800
Coly Li <[EMAIL PROTECTED]> wrote:

> Now in my mind there are several words for ext4 patches, most frequently one 
> are "patch queue".
> 
> I see the patches in patch queue from
> http://www2.kernel.org/pub/linux/kernel/people/tytso/ext4-patches/LATEST/broken-out/
>  .
> Also I confirm some of the patches are in ext4 git tree now, but I am not 
> sure for two questions:
> 1) Whether all the patches are in ext4 git tree ?

Yes, all these patches should be in ext4 git tree.

> 2) This patch queue is only used to push ext4 patch into upstream ?

The patch queue series is divided into stable and unstable patches.
The stable patches are the one usually the ones used to push back
upstream, while the unstable section has the patches for development
purposes only and are not ready for pushing upstream (and some may
never make it in).

> Also there is a patch-queue git at http://repo.or.cz/w/ext4-patch-queue.git , 
> is it same to the
> patches in http://www2.kernel.org/pub/linux/kernel/people/tytso/ext4-patches/ 
> ?

Same thing bug in git format.  I believe Ted updates his patch queue
from the patches in the git tree repo, so  if you want latest/greatest
the git tree is what you want.

> Thanks for clarifying :-)
> 
> 

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/13][e2fsprogs] Add initial checksum support.

2007-10-15 Thread Jose R. Santos
On Sun, 14 Oct 2007 22:46:05 -0400
Theodore Tso <[EMAIL PROTECTED]> wrote:

> In crc16.h, this patch assumes that linux/types.h defines uint16_t.  
> 
> There are a couple of problems with this.  
> 
> #1) linux/types.h is non-portable, not only does it not exist on
>  non-Linux systems, apparently on Ubuntu it's not always defining
>  uint16_t.  On my Ubuntu gutsy system, it doesn't always get defined.
> 
> CC ../../../lib/ext2fs/crc16.c
> In file included from ../../../lib/ext2fs/crc16.c:10:
> ../../../lib/ext2fs/crc16.h:20: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or 
> ‘__attribute__’ before ‘const’
> ../../../lib/ext2fs/crc16.h:22: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or 
> ‘__attribute__’ before ‘crc16’
> 
> 
> The right thing to do is to use ext2fs/ext2_types.h like everything
> else in e2fsprogs, and use __u16 instead of uint16_t.
> 
>   - Ted

Yes, I notice the use of linux/types.h as well.  I already have this
fixed on my patch queue and Im in the process of cleaning some of the
other patches as well.

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/13][e2fsprogs] Add new mm_lazy test case.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Add new mm_lazy test case.

Add test case for lazy bg feature.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 tests/m_lazy/expect.1 |  166 +
 tests/m_lazy/script   |4 +
 2 files changed, 170 insertions(+), 0 deletions(-)

diff --git a/tests/m_lazy/expect.1 b/tests/m_lazy/expect.1
new file mode 100644
index 000..32ca764
--- /dev/null
+++ b/tests/m_lazy/expect.1
@@ -0,0 +1,166 @@
+Filesystem label=
+OS type: Linux
+Block size=1024 (log=0)
+Fragment size=1024 (log=0)
+32768 inodes, 131072 blocks
+6553 blocks (5.00%) reserved for the super user
+First data block=1
+Maximum filesystem blocks=67371008
+16 block groups
+8192 blocks per group, 8192 fragments per group
+2048 inodes per group
+Superblock backups stored on blocks: 
+   8193, 24577, 40961, 57345, 73729
+
+Writing inode tables: done
+Writing superblocks and filesystem accounting information: done
+
+Filesystem features: resize_inode dir_index lazy_bg filetype sparse_super
+ 
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 28683/32768 files (0.0% non-contiguous), 77097/131072 blocks
+Exit status is 0
+
+Filesystem volume name:   
+Last mounted on:  
+Filesystem magic number:  0xEF53
+Filesystem revision #:1 (dynamic)
+Filesystem features:  resize_inode dir_index lazy_bg filetype sparse_super
+Default mount options:(none)
+Filesystem state: clean
+Errors behavior:  Continue
+Filesystem OS type:   Linux
+Inode count:  32768
+Block count:  131072
+Reserved block count: 6553
+Free blocks:  53975
+Free inodes:  4085
+First block:  1
+Block size:   1024
+Fragment size:1024
+Reserved GDT blocks:  256
+Blocks per group: 8192
+Fragments per group:  8192
+Inodes per group: 2048
+Inode blocks per group:   256
+Mount count:  0
+Check interval:   15552000 (6 months)
+Reserved blocks uid:  0
+Reserved blocks gid:  0
+First inode:  11
+Inode size:  128
+Default directory hash:   tea
+
+
+Group 0: (Blocks 1-8192)
+  Primary superblock at 1, Group descriptors at 2-2
+  Reserved GDT blocks at 3-258
+  Block bitmap at 259 (+258), Inode bitmap at 260 (+259)
+  Inode table at 261-516 (+260)
+  7662 free blocks, 2037 free inodes, 2 directories
+  Free blocks: 531-8192
+  Free inodes: 12-2048
+Group 1: (Blocks 8193-16384) [Inode not init]
+  Backup superblock at 8193, Group descriptors at 8194-8194
+  Reserved GDT blocks at 8195-8450
+  Block bitmap at 8451 (+258), Inode bitmap at 8452 (+259)
+  Inode table at 8453-8708 (+260)
+  7676 free blocks, 0 free inodes, 0 directories
+  Free blocks: 8709-16384
+  Free inodes: 
+Group 2: (Blocks 16385-24576) [Inode not init, Block not init]
+  Block bitmap at 16385 (+0), Inode bitmap at 16386 (+1)
+  Inode table at 16387-16642 (+2)
+  0 free blocks, 0 free inodes, 0 directories
+  Free blocks: 
+  Free inodes: 
+Group 3: (Blocks 24577-32768) [Inode not init]
+  Backup superblock at 24577, Group descriptors at 24578-24578
+  Reserved GDT blocks at 24579-24834
+  Block bitmap at 24835 (+258), Inode bitmap at 24836 (+259)
+  Inode table at 24837-25092 (+260)
+  7676 free blocks, 0 free inodes, 0 directories
+  Free blocks: 25093-32768
+  Free inodes: 
+Group 4: (Blocks 32769-40960) [Inode not init, Block not init]
+  Block bitmap at 32769 (+0), Inode bitmap at 32770 (+1)
+  Inode table at 32771-33026 (+2)
+  0 free blocks, 0 free inodes, 0 directories
+  Free blocks: 
+  Free inodes: 
+Group 5: (Blocks 40961-49152) [Inode not init]
+  Backup superblock at 40961, Group descriptors at 40962-40962
+  Reserved GDT blocks at 40963-41218
+  Block bitmap at 41219 (+258), Inode bitmap at 41220 (+259)
+  Inode table at 41221-41476 (+260)
+  7676 free blocks, 0 free inodes, 0 directories
+  Free blocks: 41477-49152
+  Free inodes: 
+Group 6: (Blocks 49153-57344) [Inode not init, Block not init]
+  Block bitmap at 49153 (+0), Inode bitmap at 49154 (+1)
+  Inode table at 49155-49410 (+2)
+  0 free blocks, 0 free inodes, 0 directories
+  Free blocks: 
+  Free inodes: 
+Group 7: (Blocks 57345-65536) [Inode not init]
+  Backup superblock at 57345, Group descriptors at 57346-57346
+  Reserved GDT blocks at 57347-57602
+  Block bitmap at 57603 (+258), Inode bitmap at 57604 (+259)
+  Inode table at 57605-57860 (+260)
+  7676 free blocks, 0 free inodes, 0 directories
+  Free blocks: 57861-65536
+  Free inodes: 
+Group 8: (Blocks 65537-73728) [Inode not init, Block not init]
+  Block bitmap at 65537 (+0), Inode bitmap at 65538 (+1)
+  Inode table at 65539-65794 (+2)
+  0 free blocks, 0 free inodes, 0 directories
+  Free blocks: 
+  Free inode

[PATCH 13/13][e2fsprogs] Add m_uninit test case.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Add m_uninit test case.

Add test case to test for uninit block groups.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 tests/m_uninit/expect.1 |  166 +++
 tests/m_uninit/script   |4 +
 2 files changed, 170 insertions(+), 0 deletions(-)

diff --git a/tests/m_uninit/expect.1 b/tests/m_uninit/expect.1
new file mode 100644
index 000..4167ff5
--- /dev/null
+++ b/tests/m_uninit/expect.1
@@ -0,0 +1,166 @@
+Filesystem label=
+OS type: Linux
+Block size=1024 (log=0)
+Fragment size=1024 (log=0)
+32768 inodes, 131072 blocks
+6553 blocks (5.00%) reserved for the super user
+First data block=1
+Maximum filesystem blocks=67371008
+16 block groups
+8192 blocks per group, 8192 fragments per group
+2048 inodes per group
+Superblock backups stored on blocks: 
+   8193, 24577, 40961, 57345, 73729
+
+Writing inode tables: done
+Writing superblocks and filesystem accounting information: done
+
+Filesystem features: resize_inode dir_index filetype sparse_super uninit_groups
+ 
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 11/32768 files (9.1% non-contiguous), 5691/131072 blocks
+Exit status is 0
+
+Filesystem volume name:   
+Last mounted on:  
+Filesystem magic number:  0xEF53
+Filesystem revision #:1 (dynamic)
+Filesystem features:  resize_inode dir_index filetype sparse_super 
uninit_groups
+Default mount options:(none)
+Filesystem state: clean
+Errors behavior:  Continue
+Filesystem OS type:   Linux
+Inode count:  32768
+Block count:  131072
+Reserved block count: 6553
+Free blocks:  125381
+Free inodes:  32757
+First block:  1
+Block size:   1024
+Fragment size:1024
+Reserved GDT blocks:  256
+Blocks per group: 8192
+Fragments per group:  8192
+Inodes per group: 2048
+Inode blocks per group:   256
+Mount count:  0
+Check interval:   15552000 (6 months)
+Reserved blocks uid:  0
+Reserved blocks gid:  0
+First inode:  11
+Inode size:  128
+Default directory hash:   tea
+
+
+Group 0: (Blocks 1-8192)
+  Primary superblock at 1, Group descriptors at 2-2
+  Reserved GDT blocks at 3-258
+  Block bitmap at 259 (+258), Inode bitmap at 260 (+259)
+  Inode table at 261-516 (+260)
+  7662 free blocks, 2037 free inodes, 2 directories, 2037 unused inodes
+  Free blocks: 531-8192
+  Free inodes: 12-2048
+Group 1: (Blocks 8193-16384) [Inode not init]
+  Backup superblock at 8193, Group descriptors at 8194-8194
+  Reserved GDT blocks at 8195-8450
+  Block bitmap at 8451 (+258), Inode bitmap at 8452 (+259)
+  Inode table at 8453-8708 (+260)
+  7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 8709-16384
+  Free inodes: 
+Group 2: (Blocks 16385-24576) [Inode not init, Block not init]
+  Block bitmap at 16385 (+0), Inode bitmap at 16386 (+1)
+  Inode table at 16387-16642 (+2)
+  7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 
+  Free inodes: 
+Group 3: (Blocks 24577-32768) [Inode not init]
+  Backup superblock at 24577, Group descriptors at 24578-24578
+  Reserved GDT blocks at 24579-24834
+  Block bitmap at 24835 (+258), Inode bitmap at 24836 (+259)
+  Inode table at 24837-25092 (+260)
+  7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 25093-32768
+  Free inodes: 
+Group 4: (Blocks 32769-40960) [Inode not init, Block not init]
+  Block bitmap at 32769 (+0), Inode bitmap at 32770 (+1)
+  Inode table at 32771-33026 (+2)
+  7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 
+  Free inodes: 
+Group 5: (Blocks 40961-49152) [Inode not init]
+  Backup superblock at 40961, Group descriptors at 40962-40962
+  Reserved GDT blocks at 40963-41218
+  Block bitmap at 41219 (+258), Inode bitmap at 41220 (+259)
+  Inode table at 41221-41476 (+260)
+  7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 41477-49152
+  Free inodes: 
+Group 6: (Blocks 49153-57344) [Inode not init, Block not init]
+  Block bitmap at 49153 (+0), Inode bitmap at 49154 (+1)
+  Inode table at 49155-49410 (+2)
+  7934 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 
+  Free inodes: 
+Group 7: (Blocks 57345-65536) [Inode not init]
+  Backup superblock at 57345, Group descriptors at 57346-57346
+  Reserved GDT blocks at 57347-57602
+  Block bitmap at 57603 (+258), Inode bitmap at 57604 (+259)
+  Inode table at 57605-57860 (+260)
+  7676 free blocks, 2048 free inodes, 0 directories, 2048 unused inodes
+  Free blocks: 57861-65536
+  Free inodes: 
+Group 8: (

[PATCH 09/13][e2fsprogs] Make e2fsck uninit block group aware.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Make e2fsck uninit block group aware.

This patch has all the necesary pieces to open and fix filesystems created 
with the uninit block group feature.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 e2fsck/e2fsck.h  |2 +
 e2fsck/journal.c |2 +
 e2fsck/pass2.c   |   77 --
 e2fsck/pass5.c   |   61 +++
 e2fsck/problem.c |   42 +
 e2fsck/problem.h |   26 ++
 e2fsck/super.c   |   40 
 e2fsck/unix.c|   11 ++--
 e2fsck/util.c|   61 +++
 9 files changed, 292 insertions(+), 30 deletions(-)

diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 9ccffd8..a67322d 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -468,6 +468,8 @@ extern void e2fsck_read_bitmaps(e2fsck_t ctx);
 extern void e2fsck_write_bitmaps(e2fsck_t ctx);
 extern void preenhalt(e2fsck_t ctx);
 extern char *string_copy(e2fsck_t ctx, const char *str, int len);
+extern errcode_t e2fsck_zero_blocks(ext2_filsys fs, blk_t blk, int num,
+   blk_t *ret_blk, int *ret_count);
 #ifdef RESOURCE_TRACK
 extern void print_resource_track(const char *desc,
 struct resource_track *track,
diff --git a/e2fsck/journal.c b/e2fsck/journal.c
index f5f4647..ceade93 100644
--- a/e2fsck/journal.c
+++ b/e2fsck/journal.c
@@ -988,6 +988,8 @@ void e2fsck_move_ext3_journal(e2fsck_t ctx)
ext2fs_unmark_inode_bitmap(fs->inode_map, ino);
ext2fs_mark_ib_dirty(fs);
fs->group_desc[group].bg_free_inodes_count++;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,&fs->group_desc[group]);
fs->super->s_free_inodes_count++;
return;
 
diff --git a/e2fsck/pass2.c b/e2fsck/pass2.c
index 27f7136..047b5ca 100644
--- a/e2fsck/pass2.c
+++ b/e2fsck/pass2.c
@@ -151,7 +151,7 @@ void e2fsck_pass2(e2fsck_t ctx)

cd.pctx.errcode = ext2fs_dblist_iterate(fs->dblist, check_dir_block,
&cd);
-   if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
+   if (ctx->flags & E2F_FLAG_SIGNAL_MASK || ctx->flags & E2F_FLAG_RESTART)
return;
if (cd.pctx.errcode) {
fix_problem(ctx, PR_2_DBLIST_ITERATE, &cd.pctx);
@@ -736,7 +736,7 @@ static int check_dir_block(ext2_filsys fs,
buf = cd->buf;
ctx = cd->ctx;
 
-   if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
+   if (ctx->flags & E2F_FLAG_SIGNAL_MASK || ctx->flags & E2F_FLAG_RESTART)
return DIRENT_ABORT;

if (ctx->progress && (ctx->progress)(ctx, 2, cd->count++, cd->max))
@@ -833,6 +833,9 @@ static int check_dir_block(ext2_filsys fs,
dict_init(&de_dict, DICTCOUNT_T_MAX, dict_de_cmp);
prev = 0;
do {
+   int group;
+   ext2_ino_t first_unused_inode;
+
problem = 0;
dirent = (struct ext2_dir_entry *) (buf + offset);
cd->pctx.dirent = dirent;
@@ -882,12 +885,6 @@ static int check_dir_block(ext2_filsys fs,
 (dirent->inode < EXT2_FIRST_INODE(fs->super))) ||
(dirent->inode > fs->super->s_inodes_count)) {
problem = PR_2_BAD_INO;
-   } else if (!(ext2fs_test_inode_bitmap(ctx->inode_used_map,
-  dirent->inode))) {
-   /*
-* If the inode is unused, offer to clear it.
-*/
-   problem = PR_2_UNUSED_INODE;
} else if (ctx->inode_bb_map &&
   (ext2fs_test_inode_bitmap(ctx->inode_bb_map,
 dirent->inode))) {
@@ -964,6 +961,67 @@ static int check_dir_block(ext2_filsys fs,
return DIRENT_ABORT;
}
 
+   group = ext2fs_group_of_ino(fs, dirent->inode);
+   first_unused_inode = group * fs->super->s_inodes_per_group +
+   1 + fs->super->s_inodes_per_group -
+   fs->group_desc[group].bg_itable_unused;
+   cd->pctx.group = group;
+
+   /*
+* Check if the inode was missed out because _INODE_UNINIT
+* flag was set or bg_itable_unused was incorrect.
+* If that is the case restart e2fsck.
+* XXX Optimisations TODO:
+* 1. only restart e2fsck once
+* 2. only exposed inode

[PATCH 11/13][e2fsprogs] Fix test cases.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Fix test cases.

Some of the tools outputs have changed, so this patch fixes what to expect
from the outputs of the f_dupfsblks and m_raid_opt test cases.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 tests/f_dupfsblks/expect.1 |3 ++-
 tests/m_raid_opt/expect.1  |   33 ++---
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/tests/f_dupfsblks/expect.1 b/tests/f_dupfsblks/expect.1
index 661e164..32ce89b 100644
--- a/tests/f_dupfsblks/expect.1
+++ b/tests/f_dupfsblks/expect.1
@@ -44,7 +44,8 @@ Salvage? yes
 Directory inode 12, block 3, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (12) has deleted/unused inode 32.  Clear? yes
+Entry '' in ??? (12) has a zero-length name.
+Clear? yes
 
 Directory inode 12, block 4, offset 100: directory corrupted
 Salvage? yes
diff --git a/tests/m_raid_opt/expect.1 b/tests/m_raid_opt/expect.1
index 44c5b46..f5abc37 100644
--- a/tests/m_raid_opt/expect.1
+++ b/tests/m_raid_opt/expect.1
@@ -46,57 +46,68 @@ Setting filetype for entry '..' in ??? (11) to 2.
 Directory inode 11, block 1, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1063.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 2, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1064.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 3, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1065.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 4, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1066.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 5, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1067.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 6, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1068.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 7, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1069.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 8, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1070.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 9, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1071.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 10, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1072.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Directory inode 11, block 11, offset 0: directory corrupted
 Salvage? yes
 
-Entry '' in ??? (11) has deleted/unused inode 1073.  Clear? yes
+Entry '' in ??? (11) has a zero-length name.
+Clear? yes
 
 Pass 3: Checking directory connectivity
 '..' in / (2) is  (0), should be / (2).
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/13][e2fsprogs] Make debugfs uninit block group aware.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Make debugfs uninit block group aware.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 debugfs/debugfs.c |   18 +++---
 1 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/debugfs/debugfs.c b/debugfs/debugfs.c
index 190c4b7..c802b63 100644
--- a/debugfs/debugfs.c
+++ b/debugfs/debugfs.c
@@ -286,7 +286,10 @@ void do_show_super_stats(int argc, char *argv[])
FILE*out;
struct ext2_group_desc *gdp;
int c, header_only = 0;
-   int numdirs = 0, first;
+   int numdirs = 0, first, gdt_csum;
+
+   gdt_csum = EXT2_HAS_RO_COMPAT_FEATURE(current_fs->super,
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM);
 
reset_getopt();
while ((c = getopt (argc, argv, "h")) != EOF) {
@@ -322,7 +325,7 @@ void do_show_super_stats(int argc, char *argv[])
"inode table at %u\n"
"   %d free %s, "
"%d free %s, "
-   "%d used %s\n",
+   "%d used %s%s",
i, gdp->bg_block_bitmap,
gdp->bg_inode_bitmap, gdp->bg_inode_table,
gdp->bg_free_blocks_count,
@@ -331,12 +334,21 @@ void do_show_super_stats(int argc, char *argv[])
gdp->bg_free_inodes_count != 1 ? "inodes" : "inode",
gdp->bg_used_dirs_count,
gdp->bg_used_dirs_count != 1 ? "directories"
-   : "directory");
+   : "directory", gdt_csum ? ", " : "\n");
+   if (gdt_csum)
+   fprintf(out, "%d unused %s\n",
+   gdp->bg_itable_unused,
+   gdp->bg_itable_unused != 1 ? "inodes":"inode");
first = 1;
print_bg_opts(gdp, EXT2_BG_INODE_UNINIT, "Inode not init",
  &first, out);
print_bg_opts(gdp, EXT2_BG_BLOCK_UNINIT, "Block not init",
  &first, out);
+   if (gdt_csum) {
+   fprintf(out, "%sChecksum 0x%04x",
+   first ? "   [":", ", gdp->bg_checksum);
+   first = 0;
+   }
if (!first)
fputs("]\n", out);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/13][e2fsprogs] Update uninit block group documetation for some of the utilities.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Update uninit block group documetation for some of the utilities.

Upadates documentation man pages for mke2fs(8) and tune2fs(8)

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 misc/mke2fs.8.in  |9 -
 misc/tune2fs.8.in |   12 ++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/misc/mke2fs.8.in b/misc/mke2fs.8.in
index a3dc4a1..171df5b 100644
--- a/misc/mke2fs.8.in
+++ b/misc/mke2fs.8.in
@@ -210,7 +210,7 @@ for the filesystem.  (For administrators who are creating
 filesystems on RAID arrays, it is preferable to use the
 .I stride
 RAID parameter as part of the
-.B \-R
+.B \-E
 option rather than manipulating the number of blocks per group.)  
 This option is generally used by developers who
 are developing test cases.  
@@ -406,6 +406,13 @@ Store file type information in directory entries.
 .TP
 .B has_journal
 Create an ext3 journal (as if using the
+.TP
+.B uninit_groups
+Create a filesystem without initializing all of the groups.  This speeds
+up filesystem creation time noticably, and can also reduce
+.BR e2fsck time
+dramatically.  This feature causes the filesystem to be read-only in
+older kernels is not supported in most Linux kernels, use with caution.
 .B \-j
 option).
 @[EMAIL PROTECTED]
diff --git a/misc/tune2fs.8.in b/misc/tune2fs.8.in
index 2e617db..5ab1bd0 100644
--- a/misc/tune2fs.8.in
+++ b/misc/tune2fs.8.in
@@ -392,10 +392,16 @@ option.
 .TP
 .B sparse_super
 Limit the number of backup superblocks to save space on large filesystems.
+.TP
+.B uninit_groups
+Allow the kernel to initialize bitmaps and inode tables and keep a high
+watermark for the unused inodes in a filesystem, to reduce
+.BR e2fsck (8)
+time.
 .RE
 .IP
 After setting or clearing 
-.B sparse_super
+.BR sparse_super , " uninit_groups" ,
 and 
 .B filetype 
 filesystem features,
@@ -414,7 +420,9 @@ can be run to convert existing directories to the hashed 
B-tree format.
 Linux kernels before 2.0.39 and many 2.1 series kernels do not support
 the filesystems that use any of these features.
 Enabling certain filesystem features may prevent the filesystem from
-being mounted by kernels which do not support those features.
+being mounted by kernels which do not support those features.  The
+.B uninit_groups
+feature is not yet supported by any released kernel.
 .TP
 .BI \-r " reserved-blocks-count"
 Set the number of reserved filesystem blocks.
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/13][e2fsprogs] Make tune2fs uninit block group aware.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Make tune2fs uninit block group aware.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 misc/tune2fs.c |   12 ++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index 833b994..e2ebc08 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -98,7 +98,8 @@ static __u32 ok_features[3] = {
EXT3_FEATURE_COMPAT_HAS_JOURNAL |
EXT2_FEATURE_COMPAT_DIR_INDEX,  /* Compat */
EXT2_FEATURE_INCOMPAT_FILETYPE, /* Incompat */
-   EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER /* R/O compat */
+   EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER |   /* R/O compat */
+   EXT4_FEATURE_RO_COMPAT_GDT_CSUM
 };
 
 /*
@@ -213,6 +214,8 @@ static int release_blocks_proc(ext2_filsys fs, blk_t 
*blocknr,
ext2fs_unmark_block_bitmap(fs->block_map,block);
group = ext2fs_group_of_blk(fs, block);
fs->group_desc[group].bg_free_blocks_count++;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,&fs->group_desc[group]);
fs->super->s_free_blocks_count++;
return 0;
 }
@@ -282,7 +285,7 @@ static void update_mntopts(ext2_filsys fs, char *mntopts)
 static void update_feature_set(ext2_filsys fs, char *features)
 {
int sparse, old_sparse, filetype, old_filetype;
-   int journal, old_journal, dxdir, old_dxdir;
+   int journal, old_journal, dxdir, old_dxdir, uninit, old_uninit;
struct ext2_super_block *sb= fs->super;
__u32   old_compat, old_incompat, old_ro_compat;
 
@@ -298,6 +301,8 @@ static void update_feature_set(ext2_filsys fs, char 
*features)
EXT3_FEATURE_COMPAT_HAS_JOURNAL;
old_dxdir = sb->s_feature_compat &
EXT2_FEATURE_COMPAT_DIR_INDEX;
+   old_uninit = sb->s_feature_ro_compat &
+   EXT4_FEATURE_RO_COMPAT_GDT_CSUM;
if (e2p_edit_feature(features, &sb->s_feature_compat,
 ok_features)) {
fprintf(stderr, _("Invalid filesystem option set: %s\n"),
@@ -312,6 +317,8 @@ static void update_feature_set(ext2_filsys fs, char 
*features)
EXT3_FEATURE_COMPAT_HAS_JOURNAL;
dxdir = sb->s_feature_compat &
EXT2_FEATURE_COMPAT_DIR_INDEX;
+   old_uninit = sb->s_feature_ro_compat &
+   EXT4_FEATURE_RO_COMPAT_GDT_CSUM;
if (old_journal && !journal) {
if ((mount_flags & EXT2_MF_MOUNTED) &&
!(mount_flags & EXT2_MF_READONLY)) {
@@ -358,6 +365,7 @@ static void update_feature_set(ext2_filsys fs, char 
*features)
 sb->s_feature_incompat))
ext2fs_update_dynamic_rev(fs);
if ((sparse != old_sparse) ||
+   (uninit != old_uninit) ||
(filetype != old_filetype)) {
sb->s_state &= ~EXT2_VALID_FS;
printf("\n%s\n", _(please_fsck));
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/13][e2fsprogs] Make dumpe2fs uninit block group aware.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Make dumpe2fs uninit block group aware.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 misc/dumpe2fs.c |   13 +
 1 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/misc/dumpe2fs.c b/misc/dumpe2fs.c
index d4d95bb..c08528a 100644
--- a/misc/dumpe2fs.c
+++ b/misc/dumpe2fs.c
@@ -112,7 +112,8 @@ static void print_bg_opts(ext2_filsys fs, dgrp_t i)
 {
int first = 1, bg_flags;
 
-   if (fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_LAZY_BG)
+   if (fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_LAZY_BG ||
+   fs->super->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_GDT_CSUM)
bg_flags = fs->group_desc[i].bg_flags;
else
bg_flags = 0;
@@ -210,11 +211,15 @@ static void list_desc (ext2_filsys fs)
diff = fs->group_desc[i].bg_inode_table - first_block;
if (diff > 0)
printf(" (+%ld)", diff);
-   printf (_("\n  %d free blocks, %d free inodes, "
- "%d directories\n"),
+   printf (_("\n  %u free blocks, %u free inodes, "
+ "%u directories%s"),
fs->group_desc[i].bg_free_blocks_count,
fs->group_desc[i].bg_free_inodes_count,
-   fs->group_desc[i].bg_used_dirs_count);
+   fs->group_desc[i].bg_used_dirs_count,
+   fs->group_desc[i].bg_itable_unused ? "" : "\n");
+   if (fs->group_desc[i].bg_itable_unused)
+   printf (_(", %u unused inodes\n"),
+   fs->group_desc[i].bg_itable_unused);
if (block_bitmap) {
fputs(_("  Free blocks: "), stdout);
ext2fs_get_block_bitmap_range(fs->block_map, 
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/13][e2fsprogs] Add support for creating filesystems using uninit block group.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Add support for creating filesystems using uninit block group.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 misc/mke2fs.c |   44 
 1 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 4a6cace..8360c51 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -432,6 +432,8 @@ static void write_inode_tables(ext2_filsys fs)
num, blk, error_message(retval));
exit(1);
}
+   /* The kernel doesn't need to zero the itable blocks */
+   fs->group_desc[i].bg_flags |= EXT2_BG_INODE_ZEROED;
}
if (sync_kludge) {
if (sync_kludge == 1)
@@ -447,34 +449,49 @@ static void write_inode_tables(ext2_filsys fs)
 static void setup_lazy_bg(ext2_filsys fs)
 {
dgrp_t i;
-   int blks;
+   int blks, csum_flag;
struct ext2_super_block *sb = fs->super;
struct ext2_group_desc *bg = fs->group_desc;
 
-   if (EXT2_HAS_COMPAT_FEATURE(fs->super, 
-   EXT2_FEATURE_COMPAT_LAZY_BG)) {
+   csum_flag = EXT2_HAS_RO_COMPAT_FEATURE(fs->super,
+  EXT4_FEATURE_RO_COMPAT_GDT_CSUM);
+   if (EXT2_HAS_COMPAT_FEATURE(fs->super, EXT2_FEATURE_COMPAT_LAZY_BG) ||
+   csum_flag) {
for (i = 0; i < fs->group_desc_count; i++, bg++) {
if ((i == 0) ||
-   (i == fs->group_desc_count-1))
+   (i == fs->group_desc_count - 1 && !csum_flag))
continue;
if (bg->bg_free_inodes_count ==
sb->s_inodes_per_group) {
-   bg->bg_free_inodes_count = 0;
bg->bg_flags |= EXT2_BG_INODE_UNINIT;
-   sb->s_free_inodes_count -= 
-   sb->s_inodes_per_group;
+   if (!csum_flag) {
+   bg->bg_free_inodes_count = 0;
+   sb->s_free_inodes_count -=
+   sb->s_inodes_per_group;
+   }
}
+
+   /* Skip groups with GDT backups because the resize
+* inode has blocks allocated in them, and the last
+* group because it needs block bitmap padding. */
+   if ((ext2fs_bg_has_super(fs, i) &&
+sb->s_reserved_gdt_blocks) ||
+   i == fs->group_desc_count - 1)
+   continue;
+
blks = ext2fs_super_and_bgd_loc(fs, i, 0, 0, 0, 0);
-   if (bg->bg_free_blocks_count == blks) {
-   bg->bg_free_blocks_count = 0;
+   if (bg->bg_free_blocks_count == blks &&
+   bg->bg_flags & EXT2_BG_INODE_UNINIT) {
bg->bg_flags |= EXT2_BG_BLOCK_UNINIT;
-   sb->s_free_blocks_count -= blks;
+   if (!csum_flag) {
+   bg->bg_free_blocks_count = 0;
+   sb->s_free_blocks_count -= blks;
+   }
}
}
}
 }
 
-
 static void create_root_dir(ext2_filsys fs)
 {
errcode_t   retval;
@@ -874,7 +891,8 @@ static __u32 ok_features[3] = {
EXT2_FEATURE_INCOMPAT_FILETYPE| /* Incompat */
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|
EXT2_FEATURE_INCOMPAT_META_BG,
-   EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER /* R/O compat */
+   EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|/* R/O compat */
+   EXT4_FEATURE_RO_COMPAT_GDT_CSUM
 };
 
 
@@ -1750,6 +1768,8 @@ int main (int argc, char *argv[])
}
 no_journal:
 
+   if (!super_only)
+   ext2fs_set_gdt_csum(fs);
if (!quiet)
printf(_("Writing superblocks and "
   "filesystem accounting information: "));
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/13][e2fsprogs] Make resize2fs uninit block group aware.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Make resize2fs uninit block group aware.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 resize/main.c  |7 +++
 resize/resize2fs.c |   29 -
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/resize/main.c b/resize/main.c
index 7db4ebc..7c1d0c1 100644
--- a/resize/main.c
+++ b/resize/main.c
@@ -298,6 +298,13 @@ int main (int argc, char ** argv)
printf (_("Couldn't find valid filesystem superblock.\n"));
exit (1);
}
+
+   if (fs->super->s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_GDT_CSUM) {
+   com_err(program_name, EXT2_ET_RO_UNSUPP_FEATURE,
+   ":- uninit_groups");
+   exit(1);
+   }
+
/*
 * Check for compatibility with the feature sets.  We need to
 * be more stringent than ext2fs_open().
diff --git a/resize/resize2fs.c b/resize/resize2fs.c
index 0d6a082..ce0111c 100644
--- a/resize/resize2fs.c
+++ b/resize/resize2fs.c
@@ -339,7 +339,9 @@ retry:
numblocks = fs->super->s_blocks_per_group;
i = old_fs->group_desc_count - 1;
fs->group_desc[i].bg_free_blocks_count += (numblocks-old_numblocks);
-   
+   fs->group_desc[i].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, i, &fs->group_desc[i]);
+
/*
 * If the number of block groups is staying the same, we're
 * done and can exit now.  (If the number block groups is
@@ -415,6 +417,8 @@ retry:
fs->group_desc[i].bg_free_inodes_count =
fs->super->s_inodes_per_group;
fs->group_desc[i].bg_used_dirs_count = 0;
+   fs->group_desc[i].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, i,&fs->group_desc[i]);
 
retval = ext2fs_allocate_group_table(fs, i, 0);
if (retval) goto errout;
@@ -1223,9 +1227,13 @@ static errcode_t inode_scan_and_fix(ext2_resize_t rfs)
if (retval) goto errout;
 
group = (new_inode-1) / 
EXT2_INODES_PER_GROUP(rfs->new_fs->super);
-   if (LINUX_S_ISDIR(inode.i_mode))
+   if (LINUX_S_ISDIR(inode.i_mode)) {
rfs->new_fs->group_desc[group].bg_used_dirs_count++;
-   
+   rfs->new_fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(rfs->new_fs->super,group,
+  &rfs->new_fs->group_desc[group]);
+   }
+
 #ifdef RESIZE2FS_DEBUG
if (rfs->flags & RESIZE_DEBUG_INODEMAP)
printf("Inode moved %u->%u\n", ino, new_inode);
@@ -1478,6 +1486,9 @@ static errcode_t move_itables(ext2_resize_t rfs)
ext2fs_unmark_block_bitmap(fs->block_map, blk);
 
rfs->old_fs->group_desc[i].bg_inode_table = new_blk;
+   rfs->old_fs->group_desc[i].bg_checksum =
+   ext2fs_group_desc_csum(rfs->old_fs->super, i,
+  &rfs->old_fs->group_desc[i]);
ext2fs_mark_super_dirty(rfs->old_fs);
ext2fs_flush(rfs->old_fs);
 
@@ -1575,8 +1586,12 @@ static errcode_t 
ext2fs_calculate_summary_stats(ext2_filsys fs)
count++;
if ((count == fs->super->s_blocks_per_group) ||
(blk == fs->super->s_blocks_count-1)) {
-   fs->group_desc[group++].bg_free_blocks_count =
+   fs->group_desc[group].bg_free_blocks_count =
group_free;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,
+  &fs->group_desc[group]);
+   group++;
count = 0;
group_free = 0;
}
@@ -1600,8 +1615,12 @@ static errcode_t 
ext2fs_calculate_summary_stats(ext2_filsys fs)
count++;
if ((count == fs->super->s_inodes_per_group) ||
(ino == fs->super->s_inodes_count)) {
-   fs->group_desc[group++].bg_free_inodes_count =
+   fs->group_desc[group].bg_free_inodes_count =
group_free;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,
+  &fs->group_desc[group]);
+   group++;
count = 0;
group_free = 0;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/13][e2fsprogs] Uninit block group break down

2007-10-11 Thread Jose R. Santos

The Uninit block group patch is painful to review since it's quite a
large patch.  Since simple patches are easier to review, I decided to
break down the patch into smaller logical pieces that should be easier
to review, clean and fix.
 
The patch is based from the latest e2fsprogs git tree and aside from a
compile test (which fails on PPC64 because of missing
ext2fs_swab_group_desc()), I have not fully tested these patches.  My
intent here is to get some of the back log of patches in better shape
for eventual inclussion into Ted's tree.

Aside from the breakdown, this is essentially the same patch that
Avantika submitted to the mailing list a couple of weeks ago with some
minor changer to allow it to apply on the latest git tree.  Let me
know what you folks think.
 
-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/13][e2fsprogs] Add uninit block group support on libe2fs.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Add uninit block group support on libe2fs.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/ext2fs/alloc_stats.c  |   25 +
 lib/ext2fs/alloc_tables.c |5 ++---
 lib/ext2fs/ext2_fs.h  |1 +
 lib/ext2fs/ext2fs.h   |4 +++-
 lib/ext2fs/initialize.c   |2 ++
 lib/ext2fs/inode.c|   29 +++--
 lib/ext2fs/openfs.c   |   16 
 lib/ext2fs/rw_bitmaps.c   |   14 ++
 8 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/lib/ext2fs/alloc_stats.c b/lib/ext2fs/alloc_stats.c
index 4088f7b..ee4a1e4 100644
--- a/lib/ext2fs/alloc_stats.c
+++ b/lib/ext2fs/alloc_stats.c
@@ -27,6 +27,27 @@ void ext2fs_inode_alloc_stats2(ext2_filsys fs, ext2_ino_t 
ino,
fs->group_desc[group].bg_free_inodes_count -= inuse;
if (isdir)
fs->group_desc[group].bg_used_dirs_count += inuse;
+
+   /* We don't strictly need to be clearing these if inuse < 0
+* (i.e. freeing inodes) but it also means something is bad. */
+   fs->group_desc[group].bg_flags &= ~(EXT2_BG_INODE_UNINIT |
+   EXT2_BG_BLOCK_UNINIT);
+   if (EXT2_HAS_RO_COMPAT_FEATURE(fs->super,
+  EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+   ext2_ino_t first_unused_inode = fs->super->s_inodes_per_group -
+   fs->group_desc[group].bg_itable_unused +
+   group * fs->super->s_inodes_per_group + 1;
+
+   if (ino >= first_unused_inode)
+   fs->group_desc[group].bg_itable_unused =
+   group * fs->super->s_inodes_per_group +
+   fs->super->s_inodes_per_group - ino;
+
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,
+  &fs->group_desc[group]);
+   }
+
fs->super->s_free_inodes_count -= inuse;
ext2fs_mark_super_dirty(fs);
ext2fs_mark_ib_dirty(fs);
@@ -46,6 +67,10 @@ void ext2fs_block_alloc_stats(ext2_filsys fs, blk_t blk, int 
inuse)
else
ext2fs_unmark_block_bitmap(fs->block_map, blk);
fs->group_desc[group].bg_free_blocks_count -= inuse;
+   fs->group_desc[group].bg_flags &= ~EXT2_BG_BLOCK_UNINIT;
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,&fs->group_desc[group]);
+
fs->super->s_free_blocks_count -= inuse;
ext2fs_mark_super_dirty(fs);
ext2fs_mark_bb_dirty(fs);
diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..290e54b 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -95,13 +95,12 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
ext2fs_mark_block_bitmap(bmap, blk);
fs->group_desc[group].bg_inode_table = new_blk;
}
+   fs->group_desc[group].bg_checksum =
+   ext2fs_group_desc_csum(fs->super, group,&fs->group_desc[group]);
 
-   
return 0;
 }
 
-   
-
 errcode_t ext2fs_allocate_tables(ext2_filsys fs)
 {
errcode_t   retval;
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index a316665..7b63828 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -173,6 +173,7 @@ struct ext4_group_desc
 
 #define EXT2_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not initialized */
 #define EXT2_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not initialized */
+#define EXT2_BG_INODE_ZEROED   0x0004 /* On-disk itable initialized to zero */
 
 /*
  * Data structures used by the directory indexing feature
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 1267ee8..c47536b 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -321,6 +321,7 @@ typedef struct ext2_struct_inode_scan *ext2_inode_scan;
 #define EXT2_SF_BAD_EXTRA_BYTES0x0004
 #define EXT2_SF_SKIP_MISSING_ITABLE0x0008
 #define EXT2_SF_DO_LAZY0x0010
+#define EXT2_SF_DO_CSUM0x0020
 
 /*
  * ext2fs_check_if_mounted flags
@@ -440,7 +441,8 @@ typedef struct ext2_icount *ext2_icount_t;
 EXT3_FEATURE_INCOMPAT_RECOVER)
 #endif
 #define EXT2_LIB_FEATURE_RO_COMPAT_SUPP
(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
-EXT2_FEATURE_RO_COMPAT_LARGE_FILE)
+EXT2_FEATURE_RO_COMPAT_LARGE_FILE|\
+EXT4_FEATURE_RO_COMPAT_GDT_CSUM)
 
 /*
  * These features are only allowed if EXT2_FLAG_SOFTSUPP_FEATURES is passed
diff --git a/lib/ext2fs/initialize.c b/lib/ext2fs/initialize.c
ind

[PATCH 03/13][e2fsprogs] Rename feature name from gdt_checksum to uninit_groups.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Rename feature name from gdt_checksum to uninit_groups.

This name is a more intuitive option when running mke2fs.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/e2p/feature.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
index fe7e65a..7c25736 100644
--- a/lib/e2p/feature.c
+++ b/lib/e2p/feature.c
@@ -45,7 +45,7 @@ static struct feature feature_list[] = {
{   E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_HUGE_FILE,
"huge_file" },
{   E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_GDT_CSUM,
-   "gdt_checksum" },
+   "uninit_groups" },
{   E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_DIR_NLINK,
"dir_nlink" },
{   E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE,
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/13][e2fsprogs] Add initial checksum support.

2007-10-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Add initial checksum support.

- Add support for computing CRC-16 value.
- Add call to check/verify/set csum on block_groups.
- Add a test program to verify csum operations. 

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/ext2fs/Makefile.in |   23 ++-
 lib/ext2fs/crc16.c |   60 +++
 lib/ext2fs/crc16.h |   29 +
 lib/ext2fs/csum.c  |  149 
 lib/ext2fs/ext2fs.h|7 ++
 lib/ext2fs/tst_csum.c  |  113 
 6 files changed, 378 insertions(+), 3 deletions(-)

diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in
index 03ce131..847fe23 100644
--- a/lib/ext2fs/Makefile.in
+++ b/lib/ext2fs/Makefile.in
@@ -66,7 +66,9 @@ OBJS= $(DEBUGFS_LIB_OBJS) $(RESIZE_LIB_OBJS) 
$(E2IMAGE_LIB_OBJS) \
unix_io.o \
unlink.o \
valid_blk.o \
-   version.o
+   version.o \
+   crc16.o \
+   csum.o
 
 SRCS= ext2_err.c \
$(srcdir)/alloc.c \
@@ -132,7 +134,10 @@ SRCS= ext2_err.c \
$(srcdir)/tst_bitops.c \
$(srcdir)/tst_byteswap.c \
$(srcdir)/tst_getsize.c \
-   $(srcdir)/tst_iscan.c
+   $(srcdir)/tst_iscan.c \
+   $(srcdir)/tst_csum.c \
+   $(srcdir)/crc16.c \
+   $(srcdir)/csum.c
 
 HFILES= bitops.h ext2fs.h ext2_io.h ext2_fs.h ext2_ext_attr.h ext3_extents.h \
tdb.h
@@ -239,17 +244,23 @@ ext2_tdbtool: tdbtool.o
@echo " LD $@"
@$(CC) -o ext2_tdbtool tdbtool.o tdb.o
 
+tst_csum: tst_csum.o csum.o crc16.o $(STATIC_LIBEXT2FS)
+   @echo " LD $@"
+   @$(CC) -o tst_csum csum.o tst_csum.o crc16.o $(STATIC_LIBEXT2FS) \
+   $(LIBCOM_ERR)
+
 mkjournal: mkjournal.c $(STATIC_LIBEXT2FS)
@echo " LD $@"
@$(CC) -o mkjournal $(srcdir)/mkjournal.c -DDEBUG $(STATIC_LIBEXT2FS) 
$(LIBCOM_ERR) $(ALL_CFLAGS)
 
-check:: tst_bitops tst_badblocks tst_iscan tst_types tst_icount tst_super_size
+check:: tst_bitops tst_badblocks tst_iscan tst_types tst_icount tst_super_size 
tst_types tst_csum
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_bitops
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_badblocks
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_iscan
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_types
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_icount
LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_super_size
+   LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_csum
 
 installdirs::
@echo " MKINSTALLDIRS $(libdir) $(includedir)/ext2fs"
@@ -357,6 +368,10 @@ closefs.o: $(srcdir)/closefs.c $(srcdir)/ext2_fs.h \
  $(srcdir)/ext2fs.h $(srcdir)/ext2_fs.h $(srcdir)/ext3_extents.h \
  $(top_srcdir)/lib/et/com_err.h $(srcdir)/ext2_io.h \
  $(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/bitops.h
+crc16.o: $(srcdir)/crc16.c $(srcdir)/ext2_fs.h $(srcdir)/crc16.h \
+ $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h $(srcdir)/ext2_fs.h
+csum.o: $(srcdir)/csum.c $(srcdir)/ext2_fs.h \
+ $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h $(srcdir)/ext2_fs.h
 dblist.o: $(srcdir)/dblist.c $(srcdir)/ext2_fs.h \
  $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fsP.h \
  $(srcdir)/ext2fs.h $(srcdir)/ext2_fs.h $(srcdir)/ext3_extents.h \
@@ -571,3 +586,5 @@ tst_iscan.o: $(srcdir)/tst_iscan.c $(srcdir)/ext2_fs.h \
  $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h \
  $(srcdir)/ext2_fs.h $(srcdir)/ext3_extents.h $(top_srcdir)/lib/et/com_err.h \
  $(srcdir)/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/bitops.h
+tst_csum.o: $(srcdir)/tst_csum.c $(srcdir)/ext2_fs.h \
+ $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h
diff --git a/lib/ext2fs/crc16.c b/lib/ext2fs/crc16.c
new file mode 100644
index 000..c3d07e1
--- /dev/null
+++ b/lib/ext2fs/crc16.c
@@ -0,0 +1,60 @@
+/*
+ *  crc16.c
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include 
+#include "crc16.h"
+
+/** CRC table for the CRC-16. The poly is 0x8005 (x16 + x15 + x2 + 1) */
+uint16_t const crc16_table[256] = {
+   0x, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
+   0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
+   0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
+   0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
+   0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
+   0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
+   0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
+   0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
+   0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
+   0x3600, 0xF6C1, 0xF781, 

[PATCH] JBD2: debug code cleanup.

2007-09-27 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

JBD2: debug code cleanup.

Mostly stolen from akpm's JBD cleanup patch.

- use `#ifdef foo' instead of `#if defined(foo)'

- Make journal_enable_debug __read_mostly just for the heck of it

- Make jbd_debugfs_dir and jbd_debug static

- debugfs_remove(NULL) is legal: remove unneeded tests

- remove unnecessary empty loops

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 fs/jbd2/journal.c |   20 ++--
 1 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f37324a..9a7187f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1953,16 +1953,14 @@ void jbd2_journal_put_journal_head(struct journal_head 
*jh)
 /*
  * debugfs tunables
  */
-#if defined(CONFIG_JBD2_DEBUG)
-u8 jbd2_journal_enable_debug;
+#ifdef CONFIG_JBD2_DEBUG
+u8 jbd2_journal_enable_debug __read_mostly;
 EXPORT_SYMBOL(jbd2_journal_enable_debug);
-#endif
-
-#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS)
 
 #define JBD2_DEBUG_NAME "jbd2-debug"
 
-struct dentry *jbd2_debugfs_dir, *jbd2_debug;
+static struct dentry *jbd2_debugfs_dir;
+static struct dentry *jbd2_debug;
 
 static void __init jbd2_create_debugfs_entry(void)
 {
@@ -1975,24 +1973,18 @@ static void __init jbd2_create_debugfs_entry(void)
 
 static void __exit jbd2_remove_debugfs_entry(void)
 {
-   if (jbd2_debug)
-   debugfs_remove(jbd2_debug);
-   if (jbd2_debugfs_dir)
-   debugfs_remove(jbd2_debugfs_dir);
+   debugfs_remove(jbd2_debug);
+   debugfs_remove(jbd2_debugfs_dir);
 }
 
 #else
 
 static void __init jbd2_create_debugfs_entry(void)
 {
-   do {
-   } while (0);
 }
 
 static void __exit jbd2_remove_debugfs_entry(void)
 {
-   do {
-   } while (0);
 }
 
 #endif
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: jbd : config_jbd_debug cannot create /proc entry

2007-09-26 Thread Jose R. Santos
On Wed, 26 Sep 2007 14:35:39 -0700
Andrew Morton <[EMAIL PROTECTED]> wrote:

> On Tue, 25 Sep 2007 16:36:08 +0200
> Jan Kara <[EMAIL PROTECTED]> wrote:
> 
> > > On Tue, 25 Sep 2007 07:49:38 -0500
> > > "Jose R. Santos" <[EMAIL PROTECTED]> wrote:
> > > 
> > > > On Tue, 25 Sep 2007 13:50:46 +0200
> > > > Jan Kara <[EMAIL PROTECTED]> wrote:
> > > > > > Jan Kara wrote:
> > > > > > >>
> > > > > > >-#define create_jbd_proc_entry() do {} while (0)
> > > > > > >-#define remove_jbd_proc_entry() do {} while (0)
> > > > > > >+static ctl_table fs_table[] = {
> > > > > > >+  {
> > > > > > >+.ctl_name   = -1, /* Don't want it */
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > shouldn't this be CTL_UNNUMBERED ?
> > > > >   Oh, it should be. I didn't notice we have this :) Thanks for 
> > > > > notifying
> > > > > me. Attached is a fixed version.
> > > > 
> > > > This was fixed in JBD2 by moving the jbd-debug file to debugfs:
> > > > http://lkml.org/lkml/2007/7/11/334
> > > > 
> > > > Since this code is already in the kernel, we should keep it consistent. 
> > > > 
> > > 
> > > OK.  Here's a quick patch to fix this.  Adapted from the JBD2 patch.
> > > Let me know what you think.
> >   Looks fine - exactly what I've just done here :).
> 
> hm.  I found rather a lot of issues.  If this patch is derived from the
> JBD2 patch then perhaps the JBD2 patch needs some looking at.

Some of the changes do apply to the JBD2 patch.  I'll send a cleanup patch.

> 
> > > Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
> >   You can add Signed-off-by: Jan Kara <[EMAIL PROTECTED]>
> 
> I suspect you might be getting your signed-off-bys and acked-bys mixed up. 
> (If not this patch, then the previous one).  Please see
> Documentation/SubmittingPatches section 13 for the difference.
> 
> Jose, please review and if possible runtime test these proposed changes?

Agree with all the changes and they worked as expected on my system. 

> From: Andrew Morton <[EMAIL PROTECTED]>
> 
> - use `#ifdef foo' instead of `#if defined(foo)'
> 
> - CONFIG_JBD_DEBUG depends on CONFIG_DEBUG_FS so we don't need to duplicate
>   that logic in the .c file ifdefs
> 
> - Make journal_enable_debug __read_mostly just for the heck of it
> 
> - Make jbd_debugfs_dir and jbd_debug static
> 
> - debugfs_remove(NULL) is legal: remove unneeded tests
> 
> - jbd_create_debugfs_entry is a better name than create_jbd_debugfs_entry
> 
> - ditto remove_jbd_debugfs_entry
> 
> - C functions are preferred over macros
> 
> Cc: "Jose R. Santos" <[EMAIL PROTECTED]>
> Cc: 
> Cc: Jan Kara <[EMAIL PROTECTED]>
> Cc: Jose R. Santos <[EMAIL PROTECTED]>
> Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>

Acked-by: Jose R. Santos <[EMAIL PROTECTED]>

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] JBD: Fix JBD warnings when compiling with CONFIG_JBD_DEBUG

2007-09-25 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

JBD: Fix JBD warnings when compiling with CONFIG_JBD_DEBUG

Note from Mingming's JBD2 fix:

Noticed all warnings are occurs when the debug level is 0. Then found
the "jbd2: Move jbd2-debug file to debugfs" patch
http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=0f49d5d019afa4e94253bfc92f0daca3badb990b

changed the jbd2_journal_enable_debug from int type to u8, makes the
jbd_debug comparision is always true when the debugging level is 0. Thus
the compile warning occurs. 

Thought about changing the jbd2_journal_enable_debug data type back to
int, but can't, because the jbd2-debug is moved to debug fs, where
calling debugfs_create_u8() to create the debugfs entry needs the value
to be u8 type.

Even if we changed the data type back to int, the code is still buggy,
kernel should not print jbd2 debug message if the
jbd2_journal_enable_debug is set to 0. But this is not the case.

The fix is change the level of debugging to 1. The same should fixed in
ext3/JBD, but currently ext3 jbd-debug via /proc fs is broken, so we
probably should fix it all together.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 fs/ext3/inode.c   |2 +-
 fs/jbd/recovery.c |6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index de4e316..f5b0e79 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2887,7 +2887,7 @@ int ext3_write_inode(struct inode *inode, int wait)
return 0;
 
if (ext3_journal_current_handle()) {
-   jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+   jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
dump_stack();
return -EIO;
}
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 2a5f4b8..c5d9694 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -250,10 +250,10 @@ int journal_recover(journal_t *journal)
if (!err)
err = do_one_pass(journal, &info, PASS_REPLAY);
 
-   jbd_debug(0, "JBD: recovery, exit status %d, "
+   jbd_debug(1, "JBD: recovery, exit status %d, "
  "recovered transactions %u to %u\n",
  err, info.start_transaction, info.end_transaction);
-   jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
+   jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
 
/* Restart the log at the next transaction ID, thus invalidating
@@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal)
 #ifdef CONFIG_JBD_DEBUG
int dropped = info.end_transaction - 
be32_to_cpu(sb->s_sequence);
 #endif
-   jbd_debug(0,
+   jbd_debug(1,
  "JBD: ignoring %d transaction%s from the journal.\n",
  dropped, (dropped == 1) ? "" : "s");
journal->j_transaction_sequence = ++info.end_transaction;
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] JBD: Export jbd-debug via debugfs

2007-09-25 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

JBD: Export jbd-debug via debugfs

The jbd-debug file used to be located in /proc/sys/fs/jbd-debug, but
create_proc_entry() does not do lookups on file names that are more that one
directory deep.  This causes the entry creation to fail and hence, no proc
file is created.

Instead of fixing this on procfs might as well move the jbd2-debug file to
debugfs which would be the preferred location for this kind of tunable.  The
new location is now /sys/kernel/debug/jbd/jbd-debug.


Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
Signed-off-by: Jan Kara <[EMAIL PROTECTED]>
--

 fs/Kconfig  |   10 
 fs/jbd/journal.c|   65 ---
 include/linux/jbd.h |2 +-
 3 files changed, 31 insertions(+), 46 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 58a0650..a8937a6 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -219,7 +219,7 @@ config JBD
 
 config JBD_DEBUG
bool "JBD (ext3) debugging support"
-   depends on JBD
+   depends on JBD && DEBUG_FS
help
  If you are using the ext3 journaled file system (or potentially any
  other file system/device using JBD), this option allows you to
@@ -228,10 +228,10 @@ config JBD_DEBUG
  debugging output will be turned off.
 
  If you select Y here, then you will be able to turn on debugging
- with "echo N > /proc/sys/fs/jbd-debug", where N is a number between
- 1 and 5, the higher the number, the more debugging output is
- generated.  To turn debugging off again, do
- "echo 0 > /proc/sys/fs/jbd-debug".
+ with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a 
+ number between 1 and 5, the higher the number, the more debugging 
+ output is generated.  To turn debugging off again, do
+ "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
 
 config JBD2
tristate
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06ab3c1..1fb59fa 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1939,64 +1940,48 @@ void journal_put_journal_head(struct journal_head *jh)
 }
 
 /*
- * /proc tunables
+ * debugfs tunables
  */
 #if defined(CONFIG_JBD_DEBUG)
-int journal_enable_debug;
+u8 journal_enable_debug;
 EXPORT_SYMBOL(journal_enable_debug);
 #endif
 
-#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
+#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_DEBUG_FS)
 
-static struct proc_dir_entry *proc_jbd_debug;
+struct dentry  *jbd_debugfs_dir, *jbd_debug;
 
-static int read_jbd_debug(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static void __init create_jbd_debugfs_entry(void)
 {
-   int ret;
-
-   ret = sprintf(page + off, "%d\n", journal_enable_debug);
-   *eof = 1;
-   return ret;
+   jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
+   if (jbd_debugfs_dir)
+   jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO,
+  jbd_debugfs_dir,
+  &journal_enable_debug);
 }
 
-static int write_jbd_debug(struct file *file, const char __user *buffer,
-  unsigned long count, void *data)
+static void __exit remove_jbd_debugfs_entry(void)
 {
-   char buf[32];
-
-   if (count > ARRAY_SIZE(buf) - 1)
-   count = ARRAY_SIZE(buf) - 1;
-   if (copy_from_user(buf, buffer, count))
-   return -EFAULT;
-   buf[ARRAY_SIZE(buf) - 1] = '\0';
-   journal_enable_debug = simple_strtoul(buf, NULL, 10);
-   return count;
+   if (jbd_debug)
+   debugfs_remove(jbd_debug);
+   if (jbd_debugfs_dir)
+   debugfs_remove(jbd_debugfs_dir);
 }
 
-#define JBD_PROC_NAME "sys/fs/jbd-debug"
+#else
 
-static void __init create_jbd_proc_entry(void)
+static void __init create_jbd_debugfs_entry(void)
 {
-   proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
-   if (proc_jbd_debug) {
-   /* Why is this so hard? */
-   proc_jbd_debug->read_proc = read_jbd_debug;
-   proc_jbd_debug->write_proc = write_jbd_debug;
-   }
+   do {
+   } while (0);
 }
 
-static void __exit remove_jbd_proc_entry(void)
+static void __exit remove_jbd_debugfs_entry(void)
 {
-   if (proc_jbd_debug)
-   remove_proc_entry(JBD_PROC_NAME, NULL);
+   do {
+   } while (0);
 }
 
-#else
-
-#define create_jbd_proc_entry() do {} while (0)
-#define remove_jbd_proc_entry() do {} while (0)
-
 #endif
 
 struct kmem_cache *jbd_handle_cache;
@@ -2054,7 +2039,7 @@ static int __init journal_init(void)
ret = journal

Re: jbd : config_jbd_debug cannot create /proc entry

2007-09-25 Thread Jose R. Santos
On Tue, 25 Sep 2007 07:49:38 -0500
"Jose R. Santos" <[EMAIL PROTECTED]> wrote:

> On Tue, 25 Sep 2007 13:50:46 +0200
> Jan Kara <[EMAIL PROTECTED]> wrote:
> > > Jan Kara wrote:
> > > >>
> > > >-#define create_jbd_proc_entry() do {} while (0)
> > > >-#define remove_jbd_proc_entry() do {} while (0)
> > > >+static ctl_table fs_table[] = {
> > > >+{
> > > >+.ctl_name   = -1,   /* Don't want it */
> > > 
> > > 
> > > 
> > > shouldn't this be CTL_UNNUMBERED ?
> >   Oh, it should be. I didn't notice we have this :) Thanks for notifying
> > me. Attached is a fixed version.
> 
> This was fixed in JBD2 by moving the jbd-debug file to debugfs:
> http://lkml.org/lkml/2007/7/11/334
> 
> Since this code is already in the kernel, we should keep it consistent. 
> 

OK.  Here's a quick patch to fix this.  Adapted from the JBD2 patch.
Let me know what you think.

-JRS

commit 6cbd2ce05b7504514707ce825170a5d77abf6a6e
Author: root <[EMAIL PROTECTED]>
Date:   Thu Jun 14 09:40:09 2007 -0500

The jbd-debug file used to be located in /proc/sys/fs/jbd-debug, but
create_proc_entry() does not do lookups on file names that are more that one
directory deep.  This causes the entry creation to fail and hence, no proc
file is created.

Instead of fixing this on procfs might as well move the jbd2-debug file to
debugfs which would be the preferred location for this kind of tunable.  The
new location is now /sys/kernel/debug/jbd/jbd-debug.


Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>

diff --git a/fs/Kconfig b/fs/Kconfig
index 58a0650..a8937a6 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -219,7 +219,7 @@ config JBD
 
 config JBD_DEBUG
bool "JBD (ext3) debugging support"
-   depends on JBD
+   depends on JBD && DEBUG_FS
help
  If you are using the ext3 journaled file system (or potentially any
  other file system/device using JBD), this option allows you to
@@ -228,10 +228,10 @@ config JBD_DEBUG
  debugging output will be turned off.
 
  If you select Y here, then you will be able to turn on debugging
- with "echo N > /proc/sys/fs/jbd-debug", where N is a number between
- 1 and 5, the higher the number, the more debugging output is
- generated.  To turn debugging off again, do
- "echo 0 > /proc/sys/fs/jbd-debug".
+ with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a 
+ number between 1 and 5, the higher the number, the more debugging 
+ output is generated.  To turn debugging off again, do
+ "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
 
 config JBD2
tristate
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06ab3c1..3cad624 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1939,63 +1940,38 @@ void journal_put_journal_head(struct journal_head *jh)
 }
 
 /*
- * /proc tunables
+ * debugfs tunables
  */
 #if defined(CONFIG_JBD_DEBUG)
-int journal_enable_debug;
+u8 journal_enable_debug;
 EXPORT_SYMBOL(journal_enable_debug);
 #endif
 
-#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
+#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_DEBUG_FS)
 
-static struct proc_dir_entry *proc_jbd_debug;
+struct dentry  *jbd_debugfs_dir, *jbd_debug;
 
-static int read_jbd_debug(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static void __init create_jbd_debugfs_entry(void)
 {
-   int ret;
-
-   ret = sprintf(page + off, "%d\n", journal_enable_debug);
-   *eof = 1;
-   return ret;
-}
-
-static int write_jbd_debug(struct file *file, const char __user *buffer,
-  unsigned long count, void *data)
-{
-   char buf[32];
-
-   if (count > ARRAY_SIZE(buf) - 1)
-   count = ARRAY_SIZE(buf) - 1;
-   if (copy_from_user(buf, buffer, count))
-   return -EFAULT;
-   buf[ARRAY_SIZE(buf) - 1] = '\0';
-   journal_enable_debug = simple_strtoul(buf, NULL, 10);
-   return count;
-}
-
-#define JBD_PROC_NAME "sys/fs/jbd-debug"
-
-static void __init create_jbd_proc_entry(void)
-{
-   proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
-   if (proc_jbd_debug) {
-   /* Why is this so hard? */
-   proc_jbd_debug->read_proc = read_jbd_debug;
-   proc_jbd_debug->write_proc = write_jbd_debug;
-   }
+   jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
+   if (jbd_debugfs_dir)
+   jbd_debug = debugfs_create_u8("jbd-deb

Re: jbd : config_jbd_debug cannot create /proc entry

2007-09-25 Thread Jose R. Santos
On Tue, 25 Sep 2007 13:50:46 +0200
Jan Kara <[EMAIL PROTECTED]> wrote:
> > Jan Kara wrote:
> > >>
> > >-#define create_jbd_proc_entry() do {} while (0)
> > >-#define remove_jbd_proc_entry() do {} while (0)
> > >+static ctl_table fs_table[] = {
> > >+  {
> > >+.ctl_name   = -1, /* Don't want it */
> > 
> > 
> > 
> > shouldn't this be CTL_UNNUMBERED ?
>   Oh, it should be. I didn't notice we have this :) Thanks for notifying
> me. Attached is a fixed version.

This was fixed in JBD2 by moving the jbd-debug file to debugfs:
http://lkml.org/lkml/2007/7/11/334

Since this code is already in the kernel, we should keep it consistent. 

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ext4: FLEX_BG Kernel support v2.

2007-09-21 Thread Jose R. Santos
On Fri, 21 Sep 2007 13:29:27 -0700
Badari Pulavarty <[EMAIL PROTECTED]> wrote:

> On Fri, 2007-09-21 at 09:06 -0500, Jose R. Santos wrote:
> > From: Jose R. Santos <[EMAIL PROTECTED]>
> > 
> > ext4: FLEX_BG Kernel support v2.
> > 
> 
> > @@ -702,13 +702,15 @@ static inline int ext4_valid_inum(struct super_block 
> > *sb, unsigned long ino)
> >  #define EXT4_FEATURE_INCOMPAT_META_BG  0x0010
> >  #define EXT4_FEATURE_INCOMPAT_EXTENTS  0x0040 /* extents 
> > support */
> >  #define EXT4_FEATURE_INCOMPAT_64BIT0x0080
> > +#define EXT4_FEATURE_INCOMPAT_FLEX_BG  0x0200
> 
> Any reason why 0x100 is skipped ?
> 
> Thanks,
> Badari
> 

Because 0x0100 is reserved for EXT4_FEATURE_INCOMPAT_MMP in e2fsprogs.

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] ext4: FLEX_BG Kernel support v2.

2007-09-21 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

ext4: FLEX_BG Kernel support v2.

This feature relaxes check restrictions on where each block groups meta data is 
located within the storage media.  This allows for the allocation of bitmaps or 
inode tables outside the block group boundaries in cases where bad blocks forces
us to look for new blocks which the owning block group can not satisfy.  This 
will also allow for new meta-data allocation schemes to improve performance and
scalability.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 fs/ext4/super.c |9 +++--
 include/linux/ext4_fs.h |4 +++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4550b83..dbce81d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1248,13 +1248,17 @@ static int ext4_check_descriptors (struct super_block * 
sb)
ext4_fsblk_t inode_table;
struct ext4_group_desc * gdp = NULL;
int desc_block = 0;
+   int flexbg_flag = 0;
int i;
 
+   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+   flexbg_flag = 1;
+
ext4_debug ("Checking group descriptors");
 
for (i = 0; i < sbi->s_groups_count; i++)
{
-   if (i == sbi->s_groups_count - 1)
+   if (i == sbi->s_groups_count - 1 || flexbg_flag)
last_block = ext4_blocks_count(sbi->s_es) - 1;
else
last_block = first_block +
@@ -1291,7 +1295,8 @@ static int ext4_check_descriptors (struct super_block * 
sb)
i, inode_table);
return 0;
}
-   first_block += EXT4_BLOCKS_PER_GROUP(sb);
+   if (!flexbg_flag)
+   first_block += EXT4_BLOCKS_PER_GROUP(sb);
gdp = (struct ext4_group_desc *)
((__u8 *)gdp + EXT4_DESC_SIZE(sb));
}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index cdee7aa..d53e167 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -702,13 +702,15 @@ static inline int ext4_valid_inum(struct super_block *sb, 
unsigned long ino)
 #define EXT4_FEATURE_INCOMPAT_META_BG  0x0010
 #define EXT4_FEATURE_INCOMPAT_EXTENTS  0x0040 /* extents support */
 #define EXT4_FEATURE_INCOMPAT_64BIT0x0080
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG  0x0200
 
 #define EXT4_FEATURE_COMPAT_SUPP   EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
 EXT4_FEATURE_INCOMPAT_RECOVER| \
 EXT4_FEATURE_INCOMPAT_META_BG| \
 EXT4_FEATURE_INCOMPAT_EXTENTS| \
-EXT4_FEATURE_INCOMPAT_64BIT)
+EXT4_FEATURE_INCOMPAT_64BIT| \
+EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #define EXT4_FEATURE_RO_COMPAT_SUPP(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Ext4 devel interlock meeting minutes (Sep 10, 2007)

2007-09-13 Thread Jose R. Santos
Attendees: Dave Kleikamp, Andreas Dilger, Jose Santos, Aneesh Kumar,
Eric Sandeen

- Lack of e2fsprogs support for some of the features is viewed as the
major roadblocks for declaring ext4 stable.  There was talk about
creating a forked version of the tools to only support ext4 while we
wait for other features to be implemented in Ted's e2fsprogs tree.
The goal of this version is to provide users with the means of creating
and fsck ext4 filesystems in order to increase test coverage.  Such
version would probably not export any libraries and thus not worry
about ABI breakage issues.

- Most filesystem layout changes are already in the patch queue so
filesystem format is stable.  Other major features are in memory
changes and thus can be changed/improve/debug after dev status is
removed.

- Andres express interest in Avantika's automated testing to see if it
was possible to test different permutation of features to test breakage
of the ext4 code.

- Jose to post FLEX_BG kernel patch for inclusion into the patch
queue.  Inode and block allocation changes are memory only, so they can
be added after removal of dev status.

- Lost of talk about the process sending patches to MM before sending
patches for Linus during the merge window.  Eric pointed out that if
nobody was testing ext4 in mm, then this hole exercise is mostly a
wasted effort.  Andreas and Shaggy pointed out Andrews concerns about
patches not in mm during the last merge window.  Shaggy pointed out
that Ted's git tree is not always up to date to the latest ext4 patch
queue and that someone need to regularly maintain a git tree Andrew can
pull from.

- Even though attendance was low Andreas suggested that a meeting
notice be sent since a lot of good discussion was made.  Everybody
agree but, nobody took meeting notes and nobody volunteered. :)

Apologies if I left out something important.

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] FLEX_BG Kernel support v2.

2007-09-12 Thread Jose R. Santos
On Wed, 12 Sep 2007 01:27:54 -0600
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Sep 11, 2007  19:07 -0500, Jose R. Santos wrote:
> > @@ -1248,13 +1248,17 @@ static int ext4_check_descriptors (struct 
> > super_block * sb)
> > ext4_fsblk_t inode_table;
> > struct ext4_group_desc * gdp = NULL;
> > int desc_block = 0;
> > +   int flexbg_bg = 0;
> > int i;
> >  
> > +   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
> > +   flexbg_flag = 1;
> 
> Umm, have you compiled this?  "int flexbg_bg" and "flexbg_flag"...

Compiled - Yes
Tested - Yes
Run "stg refresh" before sending the patch - No

-JRS

commit 27e75138890129895a5639f602fec564479583b3
Author: Jose R. Santos <[EMAIL PROTECTED]>
Date:   Wed Sep 12 06:56:58 2007 -0500

FLEX_BG Kernel support v2.

This feature relaxes check restrictions on where each block groups meta 
data is
located within the storage media.  This allows for the allocation of 
bitmaps or
inode tables outside the block group boundaries in cases where bad blocks 
forces
us to look for new blocks which the owning block group can not satisfy.  
This
will also allow for new meta-data allocation schemes to improve performance 
and
scalability.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4550b83..dbce81d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1248,13 +1248,17 @@ static int ext4_check_descriptors (struct super_block * 
sb)
ext4_fsblk_t inode_table;
struct ext4_group_desc * gdp = NULL;
int desc_block = 0;
+   int flexbg_flag = 0;
int i;
 
+   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+   flexbg_flag = 1;
+
ext4_debug ("Checking group descriptors");
 
for (i = 0; i < sbi->s_groups_count; i++)
{
-   if (i == sbi->s_groups_count - 1)
+   if (i == sbi->s_groups_count - 1 || flexbg_flag)
last_block = ext4_blocks_count(sbi->s_es) - 1;
else
last_block = first_block +
@@ -1291,7 +1295,8 @@ static int ext4_check_descriptors (struct super_block * 
sb)
i, inode_table);
return 0;
}
-   first_block += EXT4_BLOCKS_PER_GROUP(sb);
+   if (!flexbg_flag)
+   first_block += EXT4_BLOCKS_PER_GROUP(sb);
gdp = (struct ext4_group_desc *)
((__u8 *)gdp + EXT4_DESC_SIZE(sb));
}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index cdee7aa..d53e167 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -702,13 +702,15 @@ static inline int ext4_valid_inum(struct super_block *sb, 
unsigned long ino)
 #define EXT4_FEATURE_INCOMPAT_META_BG  0x0010
 #define EXT4_FEATURE_INCOMPAT_EXTENTS  0x0040 /* extents support */
 #define EXT4_FEATURE_INCOMPAT_64BIT0x0080
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG  0x0200
 
 #define EXT4_FEATURE_COMPAT_SUPP   EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
 EXT4_FEATURE_INCOMPAT_RECOVER| \
 EXT4_FEATURE_INCOMPAT_META_BG| \
 EXT4_FEATURE_INCOMPAT_EXTENTS| \
-EXT4_FEATURE_INCOMPAT_64BIT)
+EXT4_FEATURE_INCOMPAT_64BIT| \
+EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #define EXT4_FEATURE_RO_COMPAT_SUPP(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] FLEX_BG Kernel support v2.

2007-09-11 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

FLEX_BG Kernel support v2.

This feature relaxes check restrictions on where each block groups meta data is 
located within the storage media.  This allows for the allocation of bitmaps or 
inode tables outside the block group boundaries in cases where bad blocks forces
us to look for new blocks which the owning block group can not satisfy.  This 
will also allow for new meta-data allocation schemes to improve performance and
scalability.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 fs/ext4/super.c |9 +++--
 include/linux/ext4_fs.h |4 +++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4550b83..39aa76f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1248,13 +1248,17 @@ static int ext4_check_descriptors (struct super_block * 
sb)
ext4_fsblk_t inode_table;
struct ext4_group_desc * gdp = NULL;
int desc_block = 0;
+   int flexbg_bg = 0;
int i;
 
+   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+   flexbg_flag = 1;
+
ext4_debug ("Checking group descriptors");
 
for (i = 0; i < sbi->s_groups_count; i++)
{
-   if (i == sbi->s_groups_count - 1)
+   if (i == sbi->s_groups_count - 1 || flexbg_flag)
last_block = ext4_blocks_count(sbi->s_es) - 1;
else
last_block = first_block +
@@ -1291,7 +1295,8 @@ static int ext4_check_descriptors (struct super_block * 
sb)
i, inode_table);
return 0;
}
-   first_block += EXT4_BLOCKS_PER_GROUP(sb);
+   if (!flexbg_flag)
+   first_block += EXT4_BLOCKS_PER_GROUP(sb);
gdp = (struct ext4_group_desc *)
((__u8 *)gdp + EXT4_DESC_SIZE(sb));
}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index cdee7aa..d53e167 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -702,13 +702,15 @@ static inline int ext4_valid_inum(struct super_block *sb, 
unsigned long ino)
 #define EXT4_FEATURE_INCOMPAT_META_BG  0x0010
 #define EXT4_FEATURE_INCOMPAT_EXTENTS  0x0040 /* extents support */
 #define EXT4_FEATURE_INCOMPAT_64BIT0x0080
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG  0x0200
 
 #define EXT4_FEATURE_COMPAT_SUPP   EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
 EXT4_FEATURE_INCOMPAT_RECOVER| \
 EXT4_FEATURE_INCOMPAT_META_BG| \
 EXT4_FEATURE_INCOMPAT_EXTENTS| \
-EXT4_FEATURE_INCOMPAT_64BIT)
+EXT4_FEATURE_INCOMPAT_64BIT| \
+EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #define EXT4_FEATURE_RO_COMPAT_SUPP(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] FLEX_BG Kernel support.

2007-09-11 Thread Jose R. Santos
On Tue, 11 Sep 2007 17:21:39 -0600
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Sep 11, 2007  07:27 -0500, Jose R. Santos wrote:
> > On Tue, 11 Sep 2007 00:04:43 -0600
> > "Andreas Dilger" <[EMAIL PROTECTED]> wrote:
> > > On 9/10/07, Jose R. Santos <[EMAIL PROTECTED]> wrote:
> > > > @@ -1254,7 +1254,8 @@ static int ext4_check_descriptors (struct 
> > > > super_block * sb)
> > > >
> > > > for (i = 0; i < sbi->s_groups_count; i++)
> > > > {
> > > > -   if (i == sbi->s_groups_count - 1)
> > > > +   if (i == sbi->s_groups_count - 1 || 
> > > > EXT4_HAS_INCOMPAT_FEATURE(sb,
> > > > +   EXT4_FEATURE_INCOMPAT_FLEX_BG))
> > > > last_block = ext4_blocks_count(sbi->s_es) - 1;
> > > 
> > > No need to check this featyre for every  group, once at the beginning
> > > of the function is enough.
> > > 
> > 
> > Do you mean something like the original patch?
> > http://lists.openwall.net/linux-ext4/2007/07/12/20
> > 
> > Wouldn't we need to check all the descriptor for corruption if checksum
> > is not enable on the filesystem?
> 
> Yes, I just meant you don't need to have:
> 
>   EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
> 
> for each time through the loop.  That loop is walked 8000 times per TB
> at mount, so if we can make it faster we should do so.

Good point, I'll send an updated patch.


-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] FLEX_BG Kernel support.

2007-09-11 Thread Jose R. Santos
On Tue, 11 Sep 2007 00:04:43 -0600
"Andreas Dilger" <[EMAIL PROTECTED]> wrote:

> On 9/10/07, Jose R. Santos <[EMAIL PROTECTED]> wrote:
> > @@ -1254,7 +1254,8 @@ static int ext4_check_descriptors (struct super_block 
> > * sb)
> >
> > for (i = 0; i < sbi->s_groups_count; i++)
> > {
> > -   if (i == sbi->s_groups_count - 1)
> > +   if (i == sbi->s_groups_count - 1 || 
> > EXT4_HAS_INCOMPAT_FEATURE(sb,
> > +   EXT4_FEATURE_INCOMPAT_FLEX_BG))
> > last_block = ext4_blocks_count(sbi->s_es) - 1;
> 
> No need to check this featyre for every  group, once at the beginning
> of the function is enough.
> 

Do you mean something like the original patch?
http://lists.openwall.net/linux-ext4/2007/07/12/20

Wouldn't we need to check all the descriptor for corruption if checksum
is not enable on the filesystem?

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] FLEX_BG Kernel support.

2007-09-10 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

FLEX_BG Kernel support.

This feature relaxes check restrictions on where each block groups meta data is 
located within the storage media.  This allows for the allocation of bitmaps or 
inode tables outside the block group boundaries in cases where bad blocks forces
us to look for new blocks which the owning block group can not satisfy.  This 
will also allow for new meta-data allocation schemes to improve performance and
scalability.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 fs/ext4/super.c |7 +--
 include/linux/ext4_fs.h |4 +++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4550b83..902e5c8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1254,7 +1254,8 @@ static int ext4_check_descriptors (struct super_block * 
sb)
 
for (i = 0; i < sbi->s_groups_count; i++)
{
-   if (i == sbi->s_groups_count - 1)
+   if (i == sbi->s_groups_count - 1 || 
EXT4_HAS_INCOMPAT_FEATURE(sb,
+   EXT4_FEATURE_INCOMPAT_FLEX_BG))
last_block = ext4_blocks_count(sbi->s_es) - 1;
else
last_block = first_block +
@@ -1291,7 +1292,9 @@ static int ext4_check_descriptors (struct super_block * 
sb)
i, inode_table);
return 0;
}
-   first_block += EXT4_BLOCKS_PER_GROUP(sb);
+   if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
+  EXT4_FEATURE_INCOMPAT_FLEX_BG))
+   first_block += EXT4_BLOCKS_PER_GROUP(sb);
gdp = (struct ext4_group_desc *)
((__u8 *)gdp + EXT4_DESC_SIZE(sb));
}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index cdee7aa..d53e167 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -702,13 +702,15 @@ static inline int ext4_valid_inum(struct super_block *sb, 
unsigned long ino)
 #define EXT4_FEATURE_INCOMPAT_META_BG  0x0010
 #define EXT4_FEATURE_INCOMPAT_EXTENTS  0x0040 /* extents support */
 #define EXT4_FEATURE_INCOMPAT_64BIT0x0080
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG  0x0200
 
 #define EXT4_FEATURE_COMPAT_SUPP   EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
 EXT4_FEATURE_INCOMPAT_RECOVER| \
 EXT4_FEATURE_INCOMPAT_META_BG| \
 EXT4_FEATURE_INCOMPAT_EXTENTS| \
-EXT4_FEATURE_INCOMPAT_64BIT)
+EXT4_FEATURE_INCOMPAT_64BIT| \
+EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #define EXT4_FEATURE_RO_COMPAT_SUPP(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ZFS, XFS, and EXT4 compared

2007-08-30 Thread Jose R. Santos
On Thu, 30 Aug 2007 11:52:10 -0700
"Jeffrey W. Baker" <[EMAIL PROTECTED]> wrote:

> On Thu, 2007-08-30 at 08:37 -0500, Jose R. Santos wrote:  
> > On Wed, 29 Aug 2007 23:16:51 -0700
> > "Jeffrey W. Baker" <[EMAIL PROTECTED]> wrote:  
> > > http://tastic.brillig.org/~jwb/zfs-xfs-ext4.html  
> > 
> > FFSB:
> > Could you send the patch to fix FFSB Solaris build?  I should probably
> > update the Sourceforge version so that it built out of the box.  
> 
> Sadly I blew away OpenSolaris without preserving the patch, but the gist
> of it is this: ctime_r takes three parameters on Solaris (the third is
> the buffer length) and Solaris has directio(3c) instead of O_DIRECT.  

If you ever run these workloads again, a tested patch would be greatly
appreciated since I do not currently have access to a OpenSolaris box.

> > I'm also curious about your choices in the FFSB profiles you created.
> > Specifically, the very short run time and doing fsync after every file
> > close.  When using FFSB, I usually run with a large run time (usually
> > 600 seconds) to make sure that we do enough IO to get a stable
> > result.  
> 
> With a 1GB machine and max I/O of 200MB/s, I assumed 30 seconds would be
> enough for the machine to quiesce.  You disagree?  The fsync flag is in
> there because my primary workload is PostgreSQL, which is entirely
> synchronous.  

On your results, you mentioned that you are able to get about 150MB/s
out of the RAID controller and here you said you're getting about
200MB/s in FFSB?  Then it does probably mean that you needed to run for
an extended period of time since it could mean that you could be doing
a lot from page cache.  You could verify that you get the same results,
by doing one of the runs with a larger run time and comparing it to one
of the previous runs.

The fsync flag only does fsync at file close time, not at each IO
transaction on a selected file.  For the purposes of testing
PostgreSQL, wouldn't testing with O_DIRECT be more what you are looking
for?

> > Running longer means that we also use more of the disk
> > storage and our results are not base on doing IO to just the beginning
> > of the disk.  When running for that long period of time, the fsync flag
> > is not required since we do enough reads and writes to cause memory
> > pressure and guarantee IO going to disk.  Nothing wrong in what you
> > did, but I wonder how it would affect the results of these runs.  
> 
> So do I :)  I did want to finish the test in a practical amount of time,
> and it takes 4 hours for the RAID to build.  I will do a few hours-long
> runs of ffsb with Ext4 and see what it looks like.  

Been there.  I fell your pain. :)

> > The agefs options you use are also interesting since you only utilize a
> > very small percentage of your filesystem.  Also note that since create
> > and append weight are very heavy compare to deletes, the desired
> > utilization would be reach very quickly and without that much
> > fragmentation.  Again, nothing wrong here, just very interested in your
> > perspective in selecting these setting for your profile.  
> 
> The aging takes forever, as you are no doubt already aware.  It requires
> at least 1 minute for 1% utilization.  On a longer run, I can do more
> aging.  The create and append weights are taken from the README.  

Yes it does take for ever, but since you're doing so very little aging,
why even run it in the first place.  It will make you're runs go faster
if you just don't use it. :)

Did such a small aging created noticeable difference in the results?
It may have, since I've never run aging with such a small run time my
self.

> > Don't mean to invalidate the Postmark results, just merely pointing out
> > a possible error in the assessment of the meta-data performance of ZFS.
> > I say possible since it's still unknown if another workload will be
> > able to validate these results.  
> 
> I don't want to pile scorn on XFS, but the postmark workload was chosen
> for a reasonable run time on XFS, and then it turned out that it runs in
> 1-2 seconds on the other filesystems.  The scaling factors could have
> been better chosen to exercise the high speeds of Ext4 and ZFS.  The
> test needs to run for more than a minute to get meaningful results from
> postmark, since it uses truncated whole number seconds as the
> denominator when reporting.
> 
> One thing that stood out from the postmark results is how ext4/sw has a
> weird inverse scaling with respect to the number of subdirectories.
> It's faster with 1 files in 1 directory than with 100 files each in
> 100 subdirectories.  Odd, no?

Not so weird sinc

Re: ZFS, XFS, and EXT4 compared

2007-08-30 Thread Jose R. Santos
On Wed, 29 Aug 2007 23:16:51 -0700
"Jeffrey W. Baker" <[EMAIL PROTECTED]> wrote:

Nice comparisons.

> I have a lot of people whispering "zfs" in my virtual ear these days,
> and at the same time I have an irrational attachment to xfs based
> entirely on its lack of the 32000 subdirectory limit.  I'm not afraid of

The 32000 subdir limit should be fixed on the latest rc kernels.

> ext4's newness, since really a lot of that stuff has been in Lustre for
> years.  So a-benchmarking I went.  Results at the bottom:
> 
> http://tastic.brillig.org/~jwb/zfs-xfs-ext4.html

FFSB:
Could you send the patch to fix FFSB Solaris build?  I should probably
update the Sourceforge version so that it built out of the box.

I'm also curious about your choices in the FFSB profiles you created.
Specifically, the very short run time and doing fsync after every file
close.  When using FFSB, I usually run with a large run time (usually
600 seconds) to make sure that we do enough IO to get a stable
result.  Running longer means that we also use more of the disk
storage and our results are not base on doing IO to just the beginning
of the disk.  When running for that long period of time, the fsync flag
is not required since we do enough reads and writes to cause memory
pressure and guarantee IO going to disk.  Nothing wrong in what you
did, but I wonder how it would affect the results of these runs.

The agefs options you use are also interesting since you only utilize a
very small percentage of your filesystem.  Also note that since create
and append weight are very heavy compare to deletes, the desired
utilization would be reach very quickly and without that much
fragmentation.  Again, nothing wrong here, just very interested in your
perspective in selecting these setting for your profile.

Postmark:

I've been looking at the postmark results and I'm becoming more convince
that the meta-data results in ZFS may be artificially high due to the
nature of the workload.  For one thing,  I find it very interesting
(e.i. odd) that 9050KB/s reads and 28360KB/s writes shows up multiple
times even across filesystems.  The data set on postmark is also very
limited in size and the run times are small enough that it is difficult
to get an idea of sustained meta-data performance on any of the
filesystems.  Base on the ZFS numbers, it seems that there is hardly
any IO being done on the ZFS case given the random nature of the
workload and the high numbers it's achieving.

In short, I don't think postmark is a very good workload to sustain ZFS
claim as the meta-data king.  It may very well be the case, but I would
like to see that proven with another workload.  One that actually show
sustained meta-data performance across a fairly large fileset would be
preferred.  FFSB could be use simulate a meta-data intensive workload
as well and it has better control over the fileset size and run time to
make the results more interesting. 

Don't mean to invalidate the Postmark results, just merely pointing out
a possible error in the assessment of the meta-data performance of ZFS.
I say possible since it's still unknown if another workload will be
able to validate these results.

General:
Did you gathered CPU statistics when running these benchmarks?  For
some environments, having the ratio filesystem performance vs CPU
utilization would be good information to have since some workloads are
CPU sensitive and being 20% faster while consuming 50% more CPU may not
necessarily be a good thing.  While this may be less of an issue in the
future since CPU performance seems to be increasing at a much faster
pace than IO and disk performance, it would still be another
interesting data point.

> 
> Short version: ext4 is awesome.  zfs has absurdly fast metadata
> operations but falls apart on sequential transfer.  xfs has great
> sequential transfer but really bad metadata ops, like 3 minutes to tar
> up the kernel.
> 
> It would be nice if mke2fs would copy xfs's code for optimal layout on a
> software raid.  The mkfs defaults and the mdadm defaults interact badly.
>
> Postmark is somewhat bogus benchmark with some obvious quantization
> problems.

Ah...  Guess you agree with me about the postmark results validity. ;)

> Regards,
> jwb
> 


-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Adding META_BG/FLEX_BG awareness to inode allocator.

2007-08-21 Thread Jose R. Santos
Hi Andreas, Ted,

I've been looking at the uninitialized block group patches to see how
they overlap with the uninitialized inode tables that I was looking
into for FLEX_BG and I think I can achieve the same thing using those
patches.  One of the things I wanted to achieve with uninitialized
inode tables was to pack inode for the same META/FLEX_BG and allocate
new inode tables as we ran out a pre-initialized inodes.  This is
something that applies to FLEX_BG since the layout of the meta-data has
changed.

It seams that the only thing that would prevent the uninitialized block
group patches from doing this is ialloc.c unawareness of
META_BG/FLEX_BG.  It seems like this is a good excuse to make inode and
block allocation algorithms aware of the new meta-data layout.

I could add META_BG/FLEX_BG awareness and see how the
allocation/initialization of inode changes and also see what sort of
performance impacts we see by these changes. 

Thoughts?

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4][e2fsprogs] Relax group descriptor checking.

2007-08-13 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Relax group descriptor checking.

In order for tools such as dump2efs, e2fsck and debugfs to open a ext4
filesystem with FLEX_BG feature enable, some descriptor checking needs
to be relaxed.  This patch changes the group desciptor checking so
that bitmaps and inode tables can be located anywhere in the
partitions block range.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 e2fsck/super.c  |   14 --
 lib/ext2fs/check_desc.c |   15 +--
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/e2fsck/super.c b/e2fsck/super.c
index 00a131c..ed28732 100644
--- a/e2fsck/super.c
+++ b/e2fsck/super.c
@@ -463,6 +463,8 @@ void check_super_block(e2fsck_t ctx)
int inodes_per_block;
int ipg_max;
int inode_size;
+   dgrp_t  start_group;
+   int meta_bg_size;
dgrp_t  i;
blk_t   should_be;
struct problem_context  pctx;
@@ -578,8 +580,16 @@ void check_super_block(e2fsck_t ctx)
for (i = 0, gd=fs->group_desc; i < fs->group_desc_count; i++, gd++) {
pctx.group = i;
 
-   first_block = ext2fs_group_first_block(fs, i);
-   last_block = ext2fs_group_last_block(fs, i);
+   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
+  EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+   meta_bg_size = (fs->blocksize / sizeof (struct 
ext2_group_desc));
+   start_group = (i / meta_bg_size) * meta_bg_size;
+   first_block = ext2fs_group_first_block(fs, start_group);
+   last_block = ext2fs_group_first_block(fs, start_group + 
meta_bg_size);
+   } else {
+   first_block = ext2fs_group_first_block(fs, i);
+   last_block = ext2fs_group_last_block(fs, i);
+   }
 
if ((gd->bg_block_bitmap < first_block) ||
(gd->bg_block_bitmap > last_block)) {
diff --git a/lib/ext2fs/check_desc.c b/lib/ext2fs/check_desc.c
index 146f9e5..dbbcfb3 100644
--- a/lib/ext2fs/check_desc.c
+++ b/lib/ext2fs/check_desc.c
@@ -34,12 +34,23 @@ errcode_t ext2fs_check_desc(ext2_filsys fs)
dgrp_t i;
blk_t first_block = fs->super->s_first_data_block;
blk_t last_block;
+   dgrp_t start_group;
+   int meta_bg_size;
 
EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);
 
for (i = 0; i < fs->group_desc_count; i++) {
-   first_block = ext2fs_group_first_block(fs, i);
-   last_block = ext2fs_group_last_block(fs, i);
+   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super, 
+  EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+   meta_bg_size = (fs->blocksize / sizeof (struct 
ext2_group_desc));
+   start_group = (i / meta_bg_size) * meta_bg_size;
+   first_block = ext2fs_group_first_block(fs, start_group);
+   last_block = ext2fs_group_first_block(fs, start_group + 
meta_bg_size);
+   }
+   else {
+   first_block = ext2fs_group_first_block(fs, i);
+   last_block = ext2fs_group_last_block(fs, i);
+   }
 
/*
 * Check to make sure block bitmap for group is
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4][e2fsprogs] New bitmap and inode table allocation for FLEX_BG

2007-08-13 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

New bitmap and inode table allocation for FLEX_BG

Change the way we allocate bitmaps and inode tables if the FLEX_BG
feature is used at mke2fs time.  The block and inode bitmaps are
allocated as a one contiguous set for each flex block group.  Due to
the size of the inode tables, the inode table for each block group is
allocate individually but packed close together at the beginning of a
flex group.  For now, this allow for the inode table to be packed
close to the inode bitmaps in cases where we try to allocate a large
group of inode tables right after the bitmaps and fail.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/ext2fs/alloc_tables.c |  132 -
 1 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..9740a2f 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -27,6 +27,124 @@
 #include "ext2_fs.h"
 #include "ext2fs.h"
 
+#define ALLOC_BLOCK_BITMAPS1
+#define ALLOC_INODE_BITMAPS2
+#define ALLOC_INODE_TABLES 3
+
+errcode_t ext2fs_allocate_contiguous(ext2_filsys fs, dgrp_t group,
+int type, blk_t start_blk, blk_t last_blk, 
+int count, ext2fs_block_bitmap bmap)
+{
+   errcode_t   retval;
+   blk_t   new_blk, blk;
+   int i, j;
+
+   if (!bmap)
+   bmap = fs->block_map;
+
+   switch (type) {
+   case ALLOC_BLOCK_BITMAPS:
+   retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
+   1 * count, bmap, &new_blk);
+   if (retval)
+   return retval;
+   for (i=0, blk=new_blk; i < count; i++, blk++) {
+   ext2fs_mark_block_bitmap(bmap, blk);
+   fs->group_desc[group+i].bg_block_bitmap = blk;
+   }
+   break;
+
+   case ALLOC_INODE_BITMAPS:
+   retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
+   1 * count, bmap, &new_blk);
+   if (retval)
+   return retval;
+   for (i=0, blk=new_blk; i < count; i++, blk++) {
+   ext2fs_mark_block_bitmap(bmap, blk);
+   fs->group_desc[group+i].bg_inode_bitmap = blk;
+   }
+   break;
+
+   case ALLOC_INODE_TABLES:
+   for (i=0; i < count; i++) {
+   retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
+   
fs->inode_blocks_per_group,
+   bmap, &new_blk);
+   if (retval)
+   return retval;
+   blk = new_blk;
+   for (j=0; j < fs->inode_blocks_per_group; j++, blk++)
+   ext2fs_mark_block_bitmap(bmap, blk);
+   fs->group_desc[group+i].bg_inode_table = new_blk;
+   }
+   break;
+
+   }
+   return 0;
+}
+
+
+
+errcode_t ext2fs_allocate_flex_groups(ext2_filsys fs)
+{
+   errcode_t   retval;
+   blk_t   start, last, j, blocks;
+   dgrp_t  i, k;
+   int meta_bg_size;
+
+   meta_bg_size = (fs->blocksize / sizeof (struct ext2_group_desc));
+   blocks = 0;
+
+   for (i = 0; i < fs->group_desc_count; i=i+meta_bg_size) {
+   
+   start = ext2fs_group_first_block(fs, i);
+
+   if (i+meta_bg_size >= fs->group_desc_count) {
+   last = ext2fs_group_last_block(fs, 
fs->group_desc_count);
+   meta_bg_size = fs->group_desc_count - i;
+   }
+   else
+   last = ext2fs_group_last_block(fs, i+meta_bg_size-1);
+
+   retval = ext2fs_allocate_contiguous(fs, i, ALLOC_BLOCK_BITMAPS,
+   start, last, meta_bg_size,
+   fs->block_map);
+   if (retval)
+   return retval;
+   retval = ext2fs_allocate_contiguous(fs, i, ALLOC_INODE_BITMAPS,
+   start, last, meta_bg_size,
+   fs->block_map);
+   if (retval)
+   return retval;
+   retval = ext2fs_allocate_contiguous(fs, i, ALLOC_INODE_TABLES,
+   start, last, meta_bg_size,
+   fs->block_map);
+   i

[PATCH 2/4][e2fsprogs] Allow FLEX_BG to be use as a feature option at mke2fs time.

2007-08-13 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Allow FLEX_BG to be use as a feature option at mke2fs time.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/e2p/feature.c   |2 ++
 lib/ext2fs/ext2fs.h |6 --
 misc/mke2fs.c   |7 ++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
index fe7e65a..4bf5630 100644
--- a/lib/e2p/feature.c
+++ b/lib/e2p/feature.c
@@ -67,6 +67,8 @@ static struct feature feature_list[] = {
"extent" },
{   E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_64BIT,
"64bit" },
+   {   E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_FLEX_BG,
+"flex_bg"},
{   0, 0, 0 },
 };
 
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 83a9091..5c461c9 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -432,12 +432,14 @@ typedef struct ext2_icount *ext2_icount_t;
 EXT2_FEATURE_INCOMPAT_COMPRESSION|\
 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|\
 EXT2_FEATURE_INCOMPAT_META_BG|\
-EXT3_FEATURE_INCOMPAT_RECOVER)
+EXT3_FEATURE_INCOMPAT_RECOVER|\
+EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #else
 #define EXT2_LIB_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE|\
 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|\
 EXT2_FEATURE_INCOMPAT_META_BG|\
-EXT3_FEATURE_INCOMPAT_RECOVER)
+EXT3_FEATURE_INCOMPAT_RECOVER|\
+EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #endif
 #define EXT2_LIB_FEATURE_RO_COMPAT_SUPP
(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
 EXT2_FEATURE_RO_COMPAT_LARGE_FILE)
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 4a6cace..6dd8d30 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -873,7 +873,8 @@ static __u32 ok_features[3] = {
EXT2_FEATURE_COMPAT_LAZY_BG,/* Compat */
EXT2_FEATURE_INCOMPAT_FILETYPE| /* Incompat */
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|
-   EXT2_FEATURE_INCOMPAT_META_BG,
+   EXT2_FEATURE_INCOMPAT_META_BG|
+   EXT4_FEATURE_INCOMPAT_FLEX_BG,
EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER /* R/O compat */
 };
 
@@ -1363,6 +1364,10 @@ static void PRS(int argc, char *argv[])
fs_param.s_feature_ro_compat = 0;
}

+   if (fs_param.s_feature_incompat &
+   EXT4_FEATURE_INCOMPAT_FLEX_BG)
+   fs_param.s_feature_incompat |= EXT2_FEATURE_INCOMPAT_META_BG;
+
/* Set first meta blockgroup via an environment variable */
/* (this is mostly for debugging purposes) */
if ((fs_param.s_feature_incompat & EXT2_FEATURE_INCOMPAT_META_BG) &&
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4][e2fsprogs] Enable FLEX_BG support

2007-08-13 Thread Jose R. Santos

The following series of patches add support creating and checking
filesystems with the FLEX_BG feature.  This feature currently groups
meta-data from a series of groups at the beginning of a flex group in
order to improve performance during heavy meta-data operations.

Changes from last time:
- When making a filesystem with FLEX_BG also enable META_BG feature.
- Allocate meta data within the META_BG group range.  
- Descriptor checking ensures bitmaps and inode tables are in the META 
  group.

Problems and TODOs:
- Fsck has some failures using FLEX_BG and resize_inode features at the
  same time.  Still investigating.
- Need to define how unallocated inode tables will look like.
- Need to create test case

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4][e2fsprogs] Reserve the INCOMPAT feature number for FLEX_BG.

2007-08-13 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Reserve the INCOMPAT feature number for FLEX_BG.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/ext2fs/ext2_fs.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index a316665..2394857 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -640,6 +640,7 @@ struct ext2_super_block {
 #define EXT3_FEATURE_INCOMPAT_EXTENTS  0x0040
 #define EXT4_FEATURE_INCOMPAT_64BIT0x0080
 #define EXT4_FEATURE_INCOMPAT_MMP  0x0100
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG  0x0200
 
 
 #define EXT2_FEATURE_COMPAT_SUPP   0
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: seekwatcher IO visualization

2007-08-09 Thread Jose R. Santos
On Thu, 9 Aug 2007 14:53:55 -0600
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> I saw on #linuxfs today some graphs and movies by Chris Mason showing
> the poor IO locality for ext3 compared to btrfs and XFS.  It would be
> interesting to run this with ext4+mballoc+delalloc, and a separate
> one with the FLEXBG feature enabled to see how that improves the
> seeking/locality.
> 
> http://oss.oracle.com/~mason/seekwatcher/
> 
> Cheers, Andreas
> --
> Andreas Dilger
> Principal Software Engineer
> Cluster File Systems, Inc.
> 

I was planing to use seekwatcher to look at fsck activity when using
FLEX_BG since I'm already seeing noticeable improvements by using this
feature alone.

We can try other workloads and see how ext4 compares.

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH][e2fsprogs] Move ext2fs_struct_generic_bitmap back into ext2fs.h

2007-08-07 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Move ext2fs_struct_generic_bitmap back into ext2fs.h

In Commit: f1f115a78f5ea599fc5f8815a741d43fedd5840d

The ext2fs_struct_generic_bitmap structure is remove from ext2fs.h and
put into gen_bitmap.c.  This breaks big endian compiles since swapfs.c
uses this structure as well if EXT2_BIG_ENDIAN_BITMAPS is defined.

Since we have multiple users, this patch move
ext2fs_struct_generic_bitmap back into ext2fs.h in order to compile on
PowerPC or other big endian archs.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/ext2fs/ext2fs.h |   11 +++
 lib/ext2fs/gen_bitmap.c |   11 ---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index d1cda2f..f34d2f9 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -100,6 +100,17 @@ typedef __u32  ext2_dirhash_t;
 
 typedef struct struct_ext2_filsys *ext2_filsys;
 
+struct ext2fs_struct_generic_bitmap {
+   errcode_t   magic;
+   ext2_filsys fs;
+   __u32   start, end;
+   __u32   real_end;
+   char*   description;
+   char*   bitmap;
+   errcode_t   base_error_code;
+   __u32   reserved[7];
+};
+
 #define EXT2FS_MARK_ERROR  0
 #define EXT2FS_UNMARK_ERROR1
 #define EXT2FS_TEST_ERROR  2
diff --git a/lib/ext2fs/gen_bitmap.c b/lib/ext2fs/gen_bitmap.c
index 66172e5..3d01149 100644
--- a/lib/ext2fs/gen_bitmap.c
+++ b/lib/ext2fs/gen_bitmap.c
@@ -27,17 +27,6 @@
 #include "ext2_fs.h"
 #include "ext2fs.h"
 
-struct ext2fs_struct_generic_bitmap {
-   errcode_t   magic;
-   ext2_filsys fs;
-   __u32   start, end;
-   __u32   real_end;
-   char*   description;
-   char*   bitmap;
-   errcode_t   base_error_code;
-   __u32   reserved[7];
-};
-
 /* 
  * Used by previously inlined function, so we have to export this and
  * not change the function signature
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: E2fsprogs git tree

2007-08-07 Thread Jose R. Santos
On Tue, 7 Aug 2007 13:28:14 -0400
Theodore Tso <[EMAIL PROTECTED]> wrote:

> On Tue, Aug 07, 2007 at 06:23:46PM +0200, Valerie Clement wrote:
> > Hi Ted,
> > 
> > I tried to clone the source tree but it failed.
> > 
> > The command "git-clone 
> > http://www.kernel.org/pub/scm/fs/ext2/e2fsprogs.git"; failed with : 
> > "error: Could not interpret tags/APPLE_UUID_SNAP_1 as something to
> > pull"
> > 
> > The command "git-clone http://repo.or.cz/r/e2fsprogs.git"; failed
> > with: "error: Can't lock ref"
> 
> What version of git are you using?  Make sure you are using something
> which is at least git 1.5.x.

I get the same error as Valerie when using version 1.4.4.2.  The same
version worked a couple of weeks ago when using the http URL.

> 
> BTW, the better URL's to use are:
> 
> git clone git://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git
> 
> or
> 
> git clone git://repo.or.cz/e2fsprogs.git

This seems to works fine using my older version of git.
 
> The http walkers are *much* more inefficient.  I did try out git-clone
> using the http URL's, and it works, but it's slow.  The other
> possibility is that you have some kind of nasty http transparent proxy
> which is corrupting the http protocol stream.  This is why the git
> transport is seriously the much, much, MUCH better alternative.   
> 
> 
>   - Ted

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Ext4 devel interlock meeting minutes (July 30, 2007)

2007-08-03 Thread Jose R. Santos
On Fri, 03 Aug 2007 15:56:37 -0700
Avantika Mathur <[EMAIL PROTECTED]> wrote:
> Flexible Block Groups:
> - Jose is trying to see why fsck is not working.  once this is resolved, 
> the feature will be complete. 

Not quite true.  Solving the fsck issue means that we can do more
thorough testing of the performance impacts of packaging meta-data
closer together.  The patches do not currently remove the 512TB limit
imposed by the number of block groups, which is something that I also
plan to address on this feature.

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4][e2fsprogs] Relax group descriptor checking.

2007-08-03 Thread Jose R. Santos
On Fri, 3 Aug 2007 12:22:17 -0600
Andreas Dilger <[EMAIL PROTECTED]> wrote:
> On Aug 02, 2007  23:00 -0500, Jose R. Santos wrote:
> > Eventually, a more thorough check would restrict bitmaps and inode
> > tables to be located at the beginning of a flex block group range.
> > Since the super block does not currently know about the number of
> > groups per flex group, this will do for now.
> 
> As with regular block groups, it would probably be better to limit the
> bitmaps and inode tables to anywhere inside the flexbg instead of the
> start.  That would allow, for example, INCOMPAT_FLEXBG to be enabled
> on an existing filesystem and the metadata could be moved together as
> space becomes available.

This is something that would be simple to do if the ratio of block
groups per flex group known to the filesystem itself.  This implies
adding another field to the super block as reliable way to obtain this
information.  The only thing keeping me from doing so is the uncertainty
of backwards compatibility when changing the super block structure.

I agree though that one of the requirements for this feature is more
robust checking of the location of the bitmaps and inode tables within
the flex group.  Checking of the descriptor in flexbg become a little
more complicated than in regular block groups because:

1. The block and inode bitmaps should be allocated a one big chunk for
each flex group.

2. The block and inode bitmaps should be located in the first block
group and the inode tables with in the first few groups of a flex group.

3. If the full range of bitmaps in not allocated contiguously, this
means that bad blocks caused us to move a particular bitmap out and
thus the bad block list should be checked to ensure that this was the
case.

If the above conditions are not met, this could point to possible
corruption in the block descriptors.

> Cheers, Andreas
> --
> Andreas Dilger
> Principal Software Engineer
> Cluster File Systems, Inc.
> 

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4][e2fsprogs] New bitmap and inode table allocation for FLEX_BG

2007-08-03 Thread Jose R. Santos
On Fri, 03 Aug 2007 12:01:20 +0530
"Aneesh Kumar K.V" <[EMAIL PROTECTED]> wrote:

> 
> 
> Jose R. Santos wrote:
> > From: Jose R. Santos <[EMAIL PROTECTED]>
> > 
> > Ne
> > +   case ALLOC_INODE_TABLES:
> > +   for (i=0, blk=new_blk; i < count; i++, blk++) {
> 
> I guess you can drop the blk update in the for() loop above. 

This was from the remains of the very first attempt at inode table
allocation.  Thanks for catching.

> 
> > +   retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
> > +   
> > fs->inode_blocks_per_group,
> > +   bmap, &new_blk);
> > +   if (retval)
> > +   return retval;
> > +   for (j=0, blk = new_blk;
> > +j < fs->inode_blocks_per_group; j++, blk++)
> > +   ext2fs_mark_block_bitmap(bmap, blk);
> > +   fs->group_desc[group+i].bg_inode_table = new_blk;
> > +   }
> > +   break;
> > +
> > +   }
> > +   return 0;
> > +}
> > +
> > +
> > +
> > +errcode_t ext2fs_allocate_flex_groups(ext2_filsys fs)
> > +{
> > +   errcode_t   retval;
> > +   blk_t   start, last, j, blocks;
> > +   dgrp_t  i, k;
> > +   int gpm;
> > +
> > +   gpm = GROUPS_PER_FLEXBG;
> > +   blocks = 0;
> > +
> > +   for (i = 0; i < fs->group_desc_count; i=i+gpm) {
> > +   if (i == 0 )
> > +   start = ext2fs_group_first_block(fs,
> > +FIRST_METADATA_GROUP);
> > +   else
> > +   start = ext2fs_group_first_block(fs, i);
> > +
> > +   if (i+gpm-1 > fs->group_desc_count) {
>   
> 
>   if (i+gpm >= fs->group_desc_count) 

Update.

> 
> > +   last = ext2fs_group_last_block(fs, 
> > fs->group_desc_count);
> > +   gpm = fs->group_desc_count - i;
> > +   }
> 
> 
> -aneesh

Thanks

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4][e2fsprogs] Relax group descriptor checking.

2007-08-03 Thread Jose R. Santos
On Fri, 03 Aug 2007 10:54:04 +0530
"Aneesh Kumar K.V" <[EMAIL PROTECTED]> wrote:
> Jose R. Santos wrote:
> > From: Jose R. Santos <[EMAIL PROTECTED]>
> > 
> >
> >  e2fsck/super.c  |   10 --
> >  lib/ext2fs/check_desc.c |   10 --
> >  2 files changed, 16 insertions(+), 4 deletions(-)
> > 
> > diff --git a/e2fsck/super.c b/e2fsck/super.c
> > index 00a131c..8e58e5c 100644
> > --- a/e2fsck/super.c
> > +++ b/e2fsck/super.c
> > @@ -578,8 +578,14 @@ void check_super_block(e2fsck_t ctx)
> > for (i = 0, gd=fs->group_desc; i < fs->group_desc_count; i++, gd++) {
> > pctx.group = i;
> > 
> > -   first_block = ext2fs_group_first_block(fs, i);
> > -   last_block = ext2fs_group_last_block(fs, i);
> > +   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
> > +   EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
> > +   first_block = fs->super->s_first_data_block;
> > +   last_block = fs->super->s_blocks_count;
> 
> 
> I guess this should be fs->super->s_blocks_count - 1 ;

Updated.

> > +   } else {
> > +   first_block = ext2fs_group_first_block(fs, i);
> > +   last_block = ext2fs_group_last_block(fs, i);
> > +   }
> > 
> > if ((gd->bg_block_bitmap < first_block) ||
> > (gd->bg_block_bitmap > last_block)) {
> > diff --git a/lib/ext2fs/check_desc.c b/lib/ext2fs/check_desc.c
> > index 146f9e5..bb65c06 100644
> > --- a/lib/ext2fs/check_desc.c
> > +++ b/lib/ext2fs/check_desc.c
> > @@ -38,8 +38,14 @@ errcode_t ext2fs_check_desc(ext2_filsys fs)
> > EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);
> > 
> > for (i = 0; i < fs->group_desc_count; i++) {
> > -   first_block = ext2fs_group_first_block(fs, i);
> > -   last_block = ext2fs_group_last_block(fs, i);
> > +   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super, 
> > EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
> > +   first_block = fs->super->s_first_data_block;
> > +   last_block = fs->super->s_blocks_count;
> > +   
> 
> I guess this should be fs->super->s_blocks_count - 1 ;

Updated.

Thanks
 
> -aneesh

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4][e2fsprogs] New bitmap and inode table allocation for FLEX_BG

2007-08-02 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

New bitmap and inode table allocation for FLEX_BG

Change the way we allocate bitmaps and inode tables if the FLEX_BG
feature is used at mke2fs time.  The block and inode bitmaps are
allocated as a one contiguous set for each flex block group.  Due to
the size of the inode tables, the inode table for each block group is
allocate individually but packed close together at the beginning of a
flex group.  For now, this allow for the inode table to be packed
close to the inode bitmaps in cases where we try to allocate a large
group of inode tables right after the bitmaps and fail.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/ext2fs/alloc_tables.c |  138 -
 1 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..75252fa 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -27,6 +27,130 @@
 #include "ext2_fs.h"
 #include "ext2fs.h"
 
+#define ALLOC_BLOCK_BITMAPS1
+#define ALLOC_INODE_BITMAPS2
+#define ALLOC_INODE_TABLES 3
+
+#define GROUPS_PER_FLEXBG  64
+#define FIRST_METADATA_GROUP   3
+
+errcode_t ext2fs_allocate_contiguous(ext2_filsys fs, dgrp_t group,
+int type, blk_t start_blk, blk_t last_blk, 
+int count, ext2fs_block_bitmap bmap)
+{
+   errcode_t   retval;
+   blk_t   new_blk, blk;
+   int i, j;
+
+   if (!bmap)
+   bmap = fs->block_map;
+
+   switch (type) {
+   case ALLOC_BLOCK_BITMAPS:
+   retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
+   1 * count, bmap, &new_blk);
+   if (retval)
+   return retval;
+   for (i=0, blk=new_blk; i < count; i++, blk++) {
+   ext2fs_mark_block_bitmap(bmap, blk);
+   fs->group_desc[group+i].bg_block_bitmap = blk;
+   }
+   break;
+
+   case ALLOC_INODE_BITMAPS:
+   retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
+   1 * count, bmap, &new_blk);
+   if (retval)
+   return retval;
+   for (i=0, blk=new_blk; i < count; i++, blk++) {
+   ext2fs_mark_block_bitmap(bmap, blk);
+   fs->group_desc[group+i].bg_inode_bitmap = blk;
+   }
+   break;
+
+   case ALLOC_INODE_TABLES:
+   for (i=0, blk=new_blk; i < count; i++, blk++) {
+   retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
+   
fs->inode_blocks_per_group,
+   bmap, &new_blk);
+   if (retval)
+   return retval;
+   for (j=0, blk = new_blk;
+j < fs->inode_blocks_per_group; j++, blk++)
+   ext2fs_mark_block_bitmap(bmap, blk);
+   fs->group_desc[group+i].bg_inode_table = new_blk;
+   }
+   break;
+
+   }
+   return 0;
+}
+
+
+
+errcode_t ext2fs_allocate_flex_groups(ext2_filsys fs)
+{
+   errcode_t   retval;
+   blk_t   start, last, j, blocks;
+   dgrp_t  i, k;
+   int gpm;
+
+   gpm = GROUPS_PER_FLEXBG;
+   blocks = 0;
+
+   for (i = 0; i < fs->group_desc_count; i=i+gpm) {
+   if (i == 0 )
+   start = ext2fs_group_first_block(fs,
+FIRST_METADATA_GROUP);
+   else
+   start = ext2fs_group_first_block(fs, i);
+
+   if (i+gpm-1 > fs->group_desc_count) {
+   last = ext2fs_group_last_block(fs, 
fs->group_desc_count);
+   gpm = fs->group_desc_count - i;
+   }
+   else
+   last = ext2fs_group_last_block(fs, i+gpm-1);
+
+   retval = ext2fs_allocate_contiguous(fs, i, ALLOC_BLOCK_BITMAPS,
+   start, last, gpm,
+   fs->block_map);
+   if (retval)
+   return retval;
+   retval = ext2fs_allocate_contiguous(fs, i, ALLOC_INODE_BITMAPS,
+   start, last, gpm,
+   fs->block_map);
+   if (retval)
+   return retval;
+   retval = ext2fs_allocate_contiguo

[PATCH 3/4][e2fsprogs] Relax group descriptor checking.

2007-08-02 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Relax group descriptor checking.

In order for tools such as dump2efs, e2fsck and debugfs to open a ext4
filesystem with FLEX_BG feature enable, some descriptor checking needs
to be relaxed.  This patch changes the group desciptor checking so
that bitmaps and inode tables can be located anywhere in the
partitions block range.

Eventually, a more thorough check would restrict bitmaps and inode
tables to be located at the beginning of a flex block group range.
Since the super block does not currently know about the number of
groups per flex group, this will do for now.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 e2fsck/super.c  |   10 --
 lib/ext2fs/check_desc.c |   10 --
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/e2fsck/super.c b/e2fsck/super.c
index 00a131c..8e58e5c 100644
--- a/e2fsck/super.c
+++ b/e2fsck/super.c
@@ -578,8 +578,14 @@ void check_super_block(e2fsck_t ctx)
for (i = 0, gd=fs->group_desc; i < fs->group_desc_count; i++, gd++) {
pctx.group = i;
 
-   first_block = ext2fs_group_first_block(fs, i);
-   last_block = ext2fs_group_last_block(fs, i);
+   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super,
+   EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+   first_block = fs->super->s_first_data_block;
+   last_block = fs->super->s_blocks_count;
+   } else {
+   first_block = ext2fs_group_first_block(fs, i);
+   last_block = ext2fs_group_last_block(fs, i);
+   }
 
if ((gd->bg_block_bitmap < first_block) ||
(gd->bg_block_bitmap > last_block)) {
diff --git a/lib/ext2fs/check_desc.c b/lib/ext2fs/check_desc.c
index 146f9e5..bb65c06 100644
--- a/lib/ext2fs/check_desc.c
+++ b/lib/ext2fs/check_desc.c
@@ -38,8 +38,14 @@ errcode_t ext2fs_check_desc(ext2_filsys fs)
EXT2_CHECK_MAGIC(fs, EXT2_ET_MAGIC_EXT2FS_FILSYS);
 
for (i = 0; i < fs->group_desc_count; i++) {
-   first_block = ext2fs_group_first_block(fs, i);
-   last_block = ext2fs_group_last_block(fs, i);
+   if (EXT2_HAS_INCOMPAT_FEATURE (fs->super, 
EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+   first_block = fs->super->s_first_data_block;
+   last_block = fs->super->s_blocks_count;
+   }
+   else {
+   first_block = ext2fs_group_first_block(fs, i);
+   last_block = ext2fs_group_last_block(fs, i);
+   }
 
/*
 * Check to make sure block bitmap for group is
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4][e2fsprogs] Enable FLEX_BG support

2007-08-02 Thread Jose R. Santos

The following series of patches add support creating and checking
filesystems with the FLEX_BG feature.  This feature currently groups
meta-data from a series of groups at the beginning of a flex group in
order to improve performance during heavy meta-data operations.

Some light testing on meta-data filesystem and fsck times already show
some improvements.

This is still very experimental code I meant mostly as a prototype, but
comments are welcome.


-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4][e2fsprogs] Allow FLEX_BG to be use as a feature option at mke2fs time.

2007-08-02 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Allow FLEX_BG to be use as a feature option at mke2fs time.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/e2p/feature.c   |2 ++
 lib/ext2fs/ext2fs.h |6 --
 misc/mke2fs.c   |3 ++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
index fe7e65a..4bf5630 100644
--- a/lib/e2p/feature.c
+++ b/lib/e2p/feature.c
@@ -67,6 +67,8 @@ static struct feature feature_list[] = {
"extent" },
{   E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_64BIT,
"64bit" },
+   {   E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_FLEX_BG,
+"flex_bg"},
{   0, 0, 0 },
 };
 
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 20c63c0..d1cda2f 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -441,12 +441,14 @@ typedef struct ext2_icount *ext2_icount_t;
 EXT2_FEATURE_INCOMPAT_COMPRESSION|\
 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|\
 EXT2_FEATURE_INCOMPAT_META_BG|\
-EXT3_FEATURE_INCOMPAT_RECOVER)
+EXT3_FEATURE_INCOMPAT_RECOVER|\
+EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #else
 #define EXT2_LIB_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE|\
 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|\
 EXT2_FEATURE_INCOMPAT_META_BG|\
-EXT3_FEATURE_INCOMPAT_RECOVER)
+EXT3_FEATURE_INCOMPAT_RECOVER|\
+EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #endif
 #define EXT2_LIB_FEATURE_RO_COMPAT_SUPP
(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER|\
 EXT2_FEATURE_RO_COMPAT_LARGE_FILE)
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 0c6d4f3..0af92e2 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -873,7 +873,8 @@ static __u32 ok_features[3] = {
EXT2_FEATURE_COMPAT_LAZY_BG,/* Compat */
EXT2_FEATURE_INCOMPAT_FILETYPE| /* Incompat */
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|
-   EXT2_FEATURE_INCOMPAT_META_BG,
+   EXT2_FEATURE_INCOMPAT_META_BG|
+   EXT4_FEATURE_INCOMPAT_FLEX_BG,
EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER /* R/O compat */
 };
 
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4][e2fsprogs] Reserve the INCOMPAT feature number for FLEX_BG.

2007-08-02 Thread Jose R. Santos
From: Jose R. Santos <[EMAIL PROTECTED]>

Reserve the INCOMPAT feature number for FLEX_BG.

Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
--

 lib/ext2fs/ext2_fs.h |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index a316665..2394857 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -640,6 +640,7 @@ struct ext2_super_block {
 #define EXT3_FEATURE_INCOMPAT_EXTENTS  0x0040
 #define EXT4_FEATURE_INCOMPAT_64BIT0x0080
 #define EXT4_FEATURE_INCOMPAT_MMP  0x0100
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG  0x0200
 
 
 #define EXT2_FEATURE_COMPAT_SUPP   0
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: New e2fsprog doc on the ext4 wiki page.

2007-07-24 Thread Jose R. Santos
On Tue, 24 Jul 2007 21:48:58 +0530
"Aneesh Kumar K.V" <[EMAIL PROTECTED]> wrote:

> 
> 
> Jose R. Santos wrote:
> > Hi folks
> > 
> >
> > 
> > ext4migrate:
> > - Patches submited to the mailing list by Aneesh Kumar
> > http://thread.gmane.org/gmane.comp.file-systems.ext4/1395
> > 
> 
> 
> I guess what we are looking at here is 
> 
> a) ext3 -> ext4 conversion would be done via option for defrag. This
> conversion is now done via a ioctl. This is the suggested method for
> defragmenting an ext3 inode (convert to ext4 and then defrag). 
> 
> b) small inode to large inode conversion is done via tune2fs -I <
> new_inode_size> I have patches for this ready. Will be sending them
> in a day or two.
> 
> So the ext4migrate command may not be really needed.
> 
> 
> -aneesh  

I will remove this item then and add the tune2fs changes.

Is the defrag code going to be part of e2fsprogs or is this going to be
a standalone app?

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


New e2fsprog doc on the ext4 wiki page.

2007-07-24 Thread Jose R. Santos
Hi folks

As discussed in the conference call, we are going to create a new doc
on the ext4 wiki dedicated to track the development of some of the
features needed in e2fsprogs.  The page will consist of mostly changes
needed in order to keep e2fsprogs up to date with mainline ext4 kernel
code.

I don't plan to add bug fixes, cleanup or trivial changes to the page
as this would make it hard to keep the page up to date.  The link to
the page will be:

http://ext4.wiki.kernel.org/index.php?title=E2fsprogs_features_and_patches&action=edit

Comments on what you would like to see of this page or in the initial
list of features I have gather below are welcome. 


Undo I/O manager:
-Patches submited to the mailing list by Aneesh Kumar
-To be merged after 1.40
http://thread.gmane.org/gmane.comp.file-systems.ext4/2826

64bit blk Support:
- Patches submited to the mailing list by Valerie Clement
- Requires different binaries for doing 64bit support.
- Unified binaries will most likely require API/ABI changes.
- 64bit blk support requires 64bit binaries.  Breaks on PPC64
32bit user-space env.
- Ted T'so suggests a different approach:
  http://thread.gmane.org/gmane.comp.file-systems.ext4/2845
http://thread.gmane.org/gmane.comp.file-systems.ext4/2125

Uninitialized block groups:
- Girish Shilamkar recently submited new patch to the mailing list
- Kernel patches not in mainline yet.  What's the hold up here?
- No archive link yet.

i_version support:
- Kernel patches not in mainline yet.  Ongoing discussions.
- Patches submitted?

ext4migrate:
- Patches submited to the mailing list by Aneesh Kumar
http://thread.gmane.org/gmane.comp.file-systems.ext4/1395

Extents support:
- Patches submitted?

Nano second support:
- Patches submitted?

Greater than 32000 subdir support:
- Girish Shilamkar recently submited new patch to the mailing list
- No archive link yet.


-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Request for direction on changes required in e2fsprog.

2007-07-23 Thread Jose R. Santos
On Mon, 23 Jul 2007 09:32:50 -0400
Theodore Tso <[EMAIL PROTECTED]> wrote:

> I've cc'ed the linux-ext4 mailing list since a lot of this is about
> code cleanliness and coding style of e2fsprogs.  Yes, some of this
> probably should be written up in Documentation/CodingStyle file in
> e2fsprogs, since in general it's much like the kernel CodingStyle,
> except that I care a lot more about ABI and API backwards
> compatibility, since e2fsprogs exports a userspace shared library.
> 
>  -
> Ted
> 
> On Fri, Jul 20, 2007 at 11:25:49AM -0500, Jose R. Santos wrote:
> > 
> > As you mentioned in on of the interlock meeting a couple of weeks
> > ago, you said that you don't have that big of a problem changing
> > parts of the libe2fs API/ABI as long as only break it once.  
> 
> I said that we can break it at _most_ once.  But because I believe in
> doing incremental coding, I'd much rather try not to break it at all,
> and then in the worst case, break things only once.  Let me quote from
> Linus Torvalds from a recent posting he made on the git mailing list.
> 
> LT >(Most of the time I actually try to get it right the first time.
> It's LT >actually become a challenge to me to notice when some change
> needs a LT >cleanup first in order to make the later changes much
> easier, so I really LT >*like* trying to actually do the actual
> development in a logical order: LT >first re-organize the code, and
> verify that the re-organized code works LT >identically to the old
> one, then commit that, then start actually working LT >on the new
> feature with the now cleaner code-base). LT >
> LT >And no, I didn't start out programming that way. But when you get
> used to LT >looking at changes as a nice series of independent
> commits in emails, you LT >really start _working_ that way yourself.
> And I'm 100% convinced that it LT >actually makes you a better
> programmer too.
> 
> This is why I **really** dislike mongo patches such as the 64-bit
> patches from that have come out so far.  They change way too much, and
> afterwards it's very hard to see what the heck the patch actually
> *does*.  I am convinced that if we do a better job of breaking up
> patches both for e2fsprogs and for the ext4 patch queue, it will make
> it a lot easier for people to review patches.  Each patch should in
> the ideal world only do one thing.

Agree
 
> So for example, take a look at some of the patches which I just
> commited into the git "master" branch last night (Sunday night).  What
> you are seeing there are all cleanup patches.  Each of them are
> relatively small; none of them change the ABI/API; and after each of
> them e2fsprogs passes the "make check" regression test suite, so the
> whole thing is git bisectable.
> 
> All of these changes was to move any 32-bit bitmap "knowledge" out of
> inline functions, and into the file gen_bitmap.c.  Of course, I had to
> preserve any functions that had been previously called by inline
> functions, as well as anything that had been exported as part of the
> ABI.  But that's easy to do.
> 
> The next step, which I haven't done yet (and probably won't have time
> to do for at least a day or two thanks to my needing to do very Unfun
> things like Fall Plan stuff, as opposed to Fun stuff like e2fsprogs
> hacking :-) is to create a new set of interfaces that look somewhat
> like this:
> 
> int ext2fs_mark_block_nbitmap(ext2fs_block_bitmap bitmap, blk64_t
> block); int ext2fs_unmark_block_nbitmap(ext2fs_block_bitmap bitmap,
> blk64_t block); int ext2fs_test_block_nbitmap(ext2fs_block_bitmap
> bitmap, blk64_t block);
> 
> int ext2fs_mark_inode_nbitmap(ext2fs_block_bitmap bitmap, ino64_t
> inode); int ext2fs_unmark_inode_nbitmap(ext2fs_block_bitmap bitmap,
> ino64_t inode); int ext2fs_test_inode_nbitmap(ext2fs_block_bitmap
> bitmap, ino64_t inode);
> 
> And then rearrage the structure definitions like so:
> 
> in ext2fs.h:
> 
> /* Redefined from original values in ext2fs.h */
> typedef struct ext2fs_struct_nbitmap *ext2fs_generic_bitmap;
> typedef struct ext2fs_struct_inode_bitmap *ext2fs_inode_bitmap;
> typedef struct ext2fs_struct_block_bitmap *ext2fs_block_bitmap;
> 
> (No, we never define ext2fs_struct_block_bitmap and
> ext2fs_struct_inode_bitmap; after doing the appropriate structure
> magic number checking, we cast it to ext2fs_struct_nbitmap and then
> use the nbitmap functions for common handling of the inode and block
> bitmaps.  The two different structures are there just to allow the 
> compiler to enforce proper type-

Re: Initial results of FLEX_BG feature.

2007-07-16 Thread Jose R. Santos
On Mon, 16 Jul 2007 00:34:57 -0600
Andreas Dilger <[EMAIL PROTECTED]> wrote:

> On Jul 12, 2007  10:09 -0500, Jose R. Santos wrote:
> > @@ -1271,6 +1271,9 @@ static int ext4_check_descriptors (struc
> >  
> > ext4_debug ("Checking group descriptors");
> >  
> > +   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
> > +   return 1;
> > +
> > for (i = 0; i < sbi->s_groups_count; i++)
> > {
> > if (i == sbi->s_groups_count - 1)
> 
> It looks pretty straight forward to just change this code to leave
> first_block at s_first_data_block, and leave last_block at ext4_blocks_count()
> if FLEX_BG is set.

Sure.  I'll add that.
 
> Even with FLEX_BG we want to keep the group metadata within the bounds of
> the filesystem.

Eventually, I want to be able to export the groups per flex groups so
that we can correctly calculate where the bounds of each block groups
metadata should be.

> Cheers, Andreas
> --
> Andreas Dilger
> Principal Software Engineer
> Cluster File Systems, Inc.
> 

-JRS
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Initial results of FLEX_BG feature.

2007-07-12 Thread Jose R. Santos
On Wed, 11 Jul 2007 18:14:25 -0400
Theodore Tso <[EMAIL PROTECTED]> wrote:

> On Wed, Jul 11, 2007 at 12:30:04AM -0500, Jose R. Santos wrote:
> > Right now what I've done is allocate the bitmaps and inode tables at the
> > beginning of each group of 64 BG.  Still need to work on fsck since just
> > removing the restriction on were the bitmaps and inode table are
> > located still gives me errors of uninitialized inodes with dtime set.
> > Seems like fsck still expect inode information to be located at
> > specific locations within the disk.
> 
> Can you send me the patch which you were playing with?  I might be
> able to help you with this.  It should be pretty straightforward to
> remove the constraint on the inode table location.  

Here is the kernel piece.

-JRS

---
 fs/ext4/super.c |3 3 + 0 - 0 !
 include/linux/ext4_fs.h |4 3 + 1 - 0 !
 2 files changed, 6 insertions(+), 1 deletion(-)

Index: linux-2.6/fs/ext4/super.c
===
--- linux-2.6.orig/fs/ext4/super.c  2007-07-11 15:34:58.0 -0500
+++ linux-2.6/fs/ext4/super.c   2007-07-11 16:19:08.0 -0500
@@ -1271,6 +1271,9 @@ static int ext4_check_descriptors (struc
 
ext4_debug ("Checking group descriptors");
 
+   if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+   return 1;
+
for (i = 0; i < sbi->s_groups_count; i++)
{
if (i == sbi->s_groups_count - 1)
Index: linux-2.6/include/linux/ext4_fs.h
===
--- linux-2.6.orig/include/linux/ext4_fs.h  2007-07-11 15:34:58.0 
-0500
+++ linux-2.6/include/linux/ext4_fs.h   2007-07-12 09:58:51.0 -0500
@@ -698,13 +698,15 @@ static inline int ext4_valid_inum(struct
 #define EXT4_FEATURE_INCOMPAT_META_BG  0x0010
 #define EXT4_FEATURE_INCOMPAT_EXTENTS  0x0040 /* extents support */
 #define EXT4_FEATURE_INCOMPAT_64BIT0x0080
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG  0x0200
 
 #define EXT4_FEATURE_COMPAT_SUPP   EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
 EXT4_FEATURE_INCOMPAT_RECOVER| \
 EXT4_FEATURE_INCOMPAT_META_BG| \
 EXT4_FEATURE_INCOMPAT_EXTENTS| \
-EXT4_FEATURE_INCOMPAT_64BIT)
+EXT4_FEATURE_INCOMPAT_64BIT| \
+EXT4_FEATURE_INCOMPAT_FLEX_BG)
 #define EXT4_FEATURE_RO_COMPAT_SUPP(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \



-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Initial results of FLEX_BG feature.

2007-07-12 Thread Jose R. Santos
On Wed, 11 Jul 2007 18:14:25 -0400
Theodore Tso <[EMAIL PROTECTED]> wrote:
> On Wed, Jul 11, 2007 at 12:30:04AM -0500, Jose R. Santos wrote:
> > Right now what I've done is allocate the bitmaps and inode tables at the
> > beginning of each group of 64 BG.  Still need to work on fsck since just
> > removing the restriction on were the bitmaps and inode table are
> > located still gives me errors of uninitialized inodes with dtime set.
> > Seems like fsck still expect inode information to be located at
> > specific locations within the disk.
> 
> Can you send me the patch which you were playing with?  I might be
> able to help you with this.  It should be pretty straightforward to
> remove the constraint on the inode table location.  
> 
> It really should only be a check in e2fsck/super.c:check_super_block(),
> as far as I know.
> 
> If you're seeing errors of unitialized inodes with dtime set, that
> sounds like maybe something else is going on.  All of e2fsprogs should
> be referencing the inode table via fs->group_desc[group_num].bg_inode_table.  
> See lib/ext2fs/inode.c, functions ext2fs_open_inode_scan(), 
> get_next_blockgroup(), and ext2fs_read_inode_full().
> 
>   - Ted

Here is a very rough patch of the FLEX_BG feature implementation.
Still works as a prototype but there are a couple of thing that are
either broken or hard coded.  As it currently stands, it can not be use
to create filesystem without the FLEX_BG features as I have not made
ext2fs_allocate_tables() backward compatible.

The number of groups per flex group is also hard coded to 64.  Still
thinking on whether I should add this to the super block it self in
order to help recovery of the filesystem as well as possibly making
allocation algorithms in the kernel aware of the new groups
arrangements.

I create a filesystem using the following command:

mke2fs -j -O meta_bg,flex_bg  /dev/sdh

While meta_bg is not required, having block group descriptors spread
across the multiple block groups does increase the chances of
fragmenting the meta data.

-JRS

diff -Naurp e2fsprogs-1.40/e2fsck/super.c 
/home/jsantos/e2fsprogs-1.40-flex/e2fsck/super.c
--- e2fsprogs-1.40/e2fsck/super.c   2007-06-03 23:48:01.0 -0500
+++ /home/jsantos/e2fsprogs-1.40-flex/e2fsck/super.c2007-07-09 
11:27:56.0 -0500
@@ -580,27 +580,31 @@ void check_super_block(e2fsck_t ctx)
 
first_block = ext2fs_group_first_block(fs, i);
last_block = ext2fs_group_last_block(fs, i);
-
+/*
if ((gd->bg_block_bitmap < first_block) ||
(gd->bg_block_bitmap > last_block)) {
pctx.blk = gd->bg_block_bitmap;
if (fix_problem(ctx, PR_0_BB_NOT_GROUP, &pctx))
gd->bg_block_bitmap = 0;
}
+*/
if (gd->bg_block_bitmap == 0) {
ctx->invalid_block_bitmap_flag[i]++;
ctx->invalid_bitmaps++;
}
+/*
if ((gd->bg_inode_bitmap < first_block) ||
(gd->bg_inode_bitmap > last_block)) {
pctx.blk = gd->bg_inode_bitmap;
if (fix_problem(ctx, PR_0_IB_NOT_GROUP, &pctx))
gd->bg_inode_bitmap = 0;
}
+*/
if (gd->bg_inode_bitmap == 0) {
ctx->invalid_inode_bitmap_flag[i]++;
ctx->invalid_bitmaps++;
}
+/*
if ((gd->bg_inode_table < first_block) ||
((gd->bg_inode_table +
  fs->inode_blocks_per_group - 1) > last_block)) {
@@ -608,6 +612,7 @@ void check_super_block(e2fsck_t ctx)
if (fix_problem(ctx, PR_0_ITABLE_NOT_GROUP, &pctx))
gd->bg_inode_table = 0;
}
+*/
if (gd->bg_inode_table == 0) {
ctx->invalid_inode_table_flag[i]++;
ctx->invalid_bitmaps++;
diff -Naurp e2fsprogs-1.40/lib/e2p/feature.c 
/home/jsantos/e2fsprogs-1.40-flex/lib/e2p/feature.c
--- e2fsprogs-1.40/lib/e2p/feature.c2007-03-21 15:46:10.0 -0500
+++ /home/jsantos/e2fsprogs-1.40-flex/lib/e2p/feature.c 2007-07-10 
15:21:25.0 -0500
@@ -67,6 +67,8 @@ static struct feature feature_list[] = {
"extent" },
{   E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_64BIT,
"64bit" },
+   {   E2P_FEATURE_INCOMPAT, EXT4_FEATURE_INCOMPAT_FLEX_BG,
+   "flex_bg"},
{   0, 0, 0 },
 };
 
diff -Naurp e2fsprogs-1.40/lib/ext2fs/alloc_tables.c 
/home/jsantos/e2fsp

Re: [EXT4 set 2][PATCH 5/5] cleanups: Export jbd2-debug via debugfs

2007-07-11 Thread Jose R. Santos
The jbd2-debug file used to be located in /proc/sys/fs/jbd2-debug, but
create_proc_entry() does not do lookups on file names that are more that one
directory deep.  This causes the entry creation to fail and hence, no proc
file is created.

Instead of fixing this on procfs might as well move the jbd2-debug file to
debugfs which would be the preferred location for this kind of tunable.  The
new location is now /sys/kernel/debug/jbd2/jbd2-debug.


Signed-off-by: Jose R. Santos <[EMAIL PROTECTED]>
---
 fs/Kconfig   |   105 + 5 - 0 !
 fs/jbd2/journal.c|   6727 +40 -0 !
 include/linux/jbd2.h |21 + 1 - 0 !
 3 files changed, 33 insertions(+), 46 deletions(-)

Index: linux-2.6/fs/jbd2/journal.c
===
--- linux-2.6.orig/fs/jbd2/journal.c2007-07-11 09:46:25.0 -0500
+++ linux-2.6/fs/jbd2/journal.c 2007-07-11 11:31:30.0 -0500
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1951,64 +1952,50 @@ void jbd2_journal_put_journal_head(struc
 }
 
 /*
- * /proc tunables
+ * debugfs tunables
  */
 #if defined(CONFIG_JBD2_DEBUG)
-int jbd2_journal_enable_debug;
+u8 jbd2_journal_enable_debug;
 EXPORT_SYMBOL(jbd2_journal_enable_debug);
 #endif
 
-#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_PROC_FS)
+#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS)
 
-static struct proc_dir_entry *proc_jbd_debug;
+#define JBD2_DEBUG_NAME "jbd2-debug"
 
-static int read_jbd_debug(char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
-   int ret;
+struct dentry *jbd2_debugfs_dir, *jbd2_debug;
 
-   ret = sprintf(page + off, "%d\n", jbd2_journal_enable_debug);
-   *eof = 1;
-   return ret;
+static void __init jbd2_create_debugfs_entry(void)
+{
+   jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
+   if (jbd2_debugfs_dir)
+   jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO,
+  jbd2_debugfs_dir,
+  &jbd2_journal_enable_debug);
 }
 
-static int write_jbd_debug(struct file *file, const char __user *buffer,
-  unsigned long count, void *data)
+static void __exit jbd2_remove_debugfs_entry(void)
 {
-   char buf[32];
-
-   if (count > ARRAY_SIZE(buf) - 1)
-   count = ARRAY_SIZE(buf) - 1;
-   if (copy_from_user(buf, buffer, count))
-   return -EFAULT;
-   buf[ARRAY_SIZE(buf) - 1] = '\0';
-   jbd2_journal_enable_debug = simple_strtoul(buf, NULL, 10);
-   return count;
+   if (jbd2_debug)
+   debugfs_remove(jbd2_debug);
+   if (jbd2_debugfs_dir)
+   debugfs_remove(jbd2_debugfs_dir);
 }
 
-#define JBD_PROC_NAME "sys/fs/jbd2-debug"
+#else
 
-static void __init create_jbd_proc_entry(void)
+static void __init jbd2_create_debugfs_entry(void)
 {
-   proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
-   if (proc_jbd_debug) {
-   /* Why is this so hard? */
-   proc_jbd_debug->read_proc = read_jbd_debug;
-   proc_jbd_debug->write_proc = write_jbd_debug;
-   }
+   do {
+   } while (0);
 }
 
-static void __exit jbd2_remove_jbd_proc_entry(void)
+static void __exit jbd2_remove_debugfs_entry(void)
 {
-   if (proc_jbd_debug)
-   remove_proc_entry(JBD_PROC_NAME, NULL);
+   do {
+   } while (0);
 }
 
-#else
-
-#define create_jbd_proc_entry() do {} while (0)
-#define jbd2_remove_jbd_proc_entry() do {} while (0)
-
 #endif
 
 struct kmem_cache *jbd2_handle_cache;
@@ -2067,7 +2054,7 @@ static int __init journal_init(void)
ret = journal_init_caches();
if (ret != 0)
jbd2_journal_destroy_caches();
-   create_jbd_proc_entry();
+   jbd2_create_debugfs_entry();
return ret;
 }
 
@@ -2078,7 +2065,7 @@ static void __exit journal_exit(void)
if (n)
printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
 #endif
-   jbd2_remove_jbd_proc_entry();
+   jbd2_remove_debugfs_entry();
jbd2_journal_destroy_caches();
 }
 
Index: linux-2.6/include/linux/jbd2.h
===
--- linux-2.6.orig/include/linux/jbd2.h 2007-07-11 09:46:25.0 -0500
+++ linux-2.6/include/linux/jbd2.h  2007-07-11 10:37:06.0 -0500
@@ -57,7 +57,7 @@
  * CONFIG_JBD2_DEBUG is on.
  */
 #define JBD_EXPENSIVE_CHECKING
-extern int jbd2_journal_enable_debug;
+extern u8 jbd2_journal_enable_debug;
 
 #define jbd_debug(n, f, a...)  \
do {\
Index: linux-2.6/fs/Kconfig
=

  1   2   >