[patch 135/208] iget: stop EXT2 from using iget() and read_inode()

2008-02-07 Thread akpm
From: David Howells [EMAIL PROTECTED]

Stop the EXT2 filesystem from using iget() and read_inode().  Replace
ext2_read_inode() with ext2_iget(), and call that instead of iget(). 
ext2_iget() then uses iget_locked() directly and returns a proper error code
instead of an inode in the event of an error.

ext2_fill_super() returns any error incurred when getting the root inode
instead of EINVAL.

[EMAIL PROTECTED]: coding-style fixes]
Signed-off-by: David Howells [EMAIL PROTECTED]
Acked-by: Theodore Ts'o [EMAIL PROTECTED]
Cc: linux-ext4@vger.kernel.org
Acked-by: Christoph Hellwig [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 fs/ext2/ext2.h  |2 +-
 fs/ext2/inode.c |   29 +
 fs/ext2/namei.c |   12 ++--
 fs/ext2/super.c |   32 ++--
 4 files changed, 46 insertions(+), 29 deletions(-)

diff -puN fs/ext2/ext2.h~iget-stop-ext2-from-using-iget-and-read_inode-try 
fs/ext2/ext2.h
--- a/fs/ext2/ext2.h~iget-stop-ext2-from-using-iget-and-read_inode-try
+++ a/fs/ext2/ext2.h
@@ -124,7 +124,7 @@ extern void ext2_check_inodes_bitmap (st
 extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
 
 /* inode.c */
-extern void ext2_read_inode (struct inode *);
+extern struct inode *ext2_iget (struct super_block *, unsigned long);
 extern int ext2_write_inode (struct inode *, int);
 extern void ext2_put_inode (struct inode *);
 extern void ext2_delete_inode (struct inode *);
diff -puN fs/ext2/inode.c~iget-stop-ext2-from-using-iget-and-read_inode-try 
fs/ext2/inode.c
--- a/fs/ext2/inode.c~iget-stop-ext2-from-using-iget-and-read_inode-try
+++ a/fs/ext2/inode.c
@@ -1181,22 +1181,33 @@ void ext2_get_inode_flags(struct ext2_in
ei-i_flags |= EXT2_DIRSYNC_FL;
 }
 
-void ext2_read_inode (struct inode * inode)
+struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 {
-   struct ext2_inode_info *ei = EXT2_I(inode);
-   ino_t ino = inode-i_ino;
+   struct ext2_inode_info *ei;
struct buffer_head * bh;
-   struct ext2_inode * raw_inode = ext2_get_inode(inode-i_sb, ino, bh);
+   struct ext2_inode *raw_inode;
+   struct inode *inode;
+   long ret = -EIO;
int n;
 
+   inode = iget_locked(sb, ino);
+   if (!inode)
+   return ERR_PTR(-ENOMEM);
+   if (!(inode-i_state  I_NEW))
+   return inode;
+
+   ei = EXT2_I(inode);
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
ei-i_acl = EXT2_ACL_NOT_CACHED;
ei-i_default_acl = EXT2_ACL_NOT_CACHED;
 #endif
ei-i_block_alloc_info = NULL;
 
-   if (IS_ERR(raw_inode))
+   raw_inode = ext2_get_inode(inode-i_sb, ino, bh);
+   if (IS_ERR(raw_inode)) {
+   ret = PTR_ERR(raw_inode);
goto bad_inode;
+   }
 
inode-i_mode = le16_to_cpu(raw_inode-i_mode);
inode-i_uid = (uid_t)le16_to_cpu(raw_inode-i_uid_low);
@@ -1220,6 +1231,7 @@ void ext2_read_inode (struct inode * ino
if (inode-i_nlink == 0  (inode-i_mode == 0 || ei-i_dtime)) {
/* this inode is deleted */
brelse (bh);
+   ret = -ESTALE;
goto bad_inode;
}
inode-i_blocks = le32_to_cpu(raw_inode-i_blocks);
@@ -1286,11 +1298,12 @@ void ext2_read_inode (struct inode * ino
}
brelse (bh);
ext2_set_inode_flags(inode);
-   return;
+   unlock_new_inode(inode);
+   return inode;

 bad_inode:
-   make_bad_inode(inode);
-   return;
+   iget_failed(inode);
+   return ERR_PTR(ret);
 }
 
 static int ext2_update_inode(struct inode * inode, int do_sync)
diff -puN fs/ext2/namei.c~iget-stop-ext2-from-using-iget-and-read_inode-try 
fs/ext2/namei.c
--- a/fs/ext2/namei.c~iget-stop-ext2-from-using-iget-and-read_inode-try
+++ a/fs/ext2/namei.c
@@ -63,9 +63,9 @@ static struct dentry *ext2_lookup(struct
ino = ext2_inode_by_name(dir, dentry);
inode = NULL;
if (ino) {
-   inode = iget(dir-i_sb, ino);
-   if (!inode)
-   return ERR_PTR(-EACCES);
+   inode = ext2_iget(dir-i_sb, ino);
+   if (IS_ERR(inode))
+   return ERR_CAST(inode);
}
return d_splice_alias(inode, dentry);
 }
@@ -83,10 +83,10 @@ struct dentry *ext2_get_parent(struct de
ino = ext2_inode_by_name(child-d_inode, dotdot);
if (!ino)
return ERR_PTR(-ENOENT);
-   inode = iget(child-d_inode-i_sb, ino);
+   inode = ext2_iget(child-d_inode-i_sb, ino);
 
-   if (!inode)
-   return ERR_PTR(-EACCES);
+   if (IS_ERR(inode))
+   return ERR_CAST(inode);
parent = d_alloc_anon(inode);
if (!parent) {
iput(inode);
diff -puN fs/ext2/super.c~iget-stop-ext2-from-using-iget-and-read_inode-try 
fs/ext2/super.c
--- a/fs/ext2/super.c~iget-stop-ext2-from-using-iget-and-read_inode-try
+++ a/fs/ext2/super.c
@@ 

[patch 137/208] iget: stop EXT4 from using iget() and read_inode()

2008-02-07 Thread akpm
From: David Howells [EMAIL PROTECTED]

Stop the EXT4 filesystem from using iget() and read_inode().  Replace
ext4_read_inode() with ext4_iget(), and call that instead of iget(). 
ext4_iget() then uses iget_locked() directly and returns a proper error code
instead of an inode in the event of an error.

ext4_fill_super() returns any error incurred when getting the root inode
instead of EINVAL.

Signed-off-by: David Howells [EMAIL PROTECTED]
Acked-by: Theodore Ts'o [EMAIL PROTECTED]
Acked-by: Jan Kara [EMAIL PROTECTED]
Cc: linux-ext4@vger.kernel.org
Acked-by: Christoph Hellwig [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
---

 fs/ext4/ialloc.c|   58 ++
 fs/ext4/inode.c |   25 
 fs/ext4/namei.c |   29 +--
 fs/ext4/resize.c|7 +---
 fs/ext4/super.c |   36 +--
 include/linux/ext4_fs.h |2 -
 6 files changed, 87 insertions(+), 70 deletions(-)

diff -puN fs/ext4/ialloc.c~iget-stop-ext4-from-using-iget-and-read_inode-try 
fs/ext4/ialloc.c
--- a/fs/ext4/ialloc.c~iget-stop-ext4-from-using-iget-and-read_inode-try
+++ a/fs/ext4/ialloc.c
@@ -782,14 +782,15 @@ struct inode *ext4_orphan_get(struct sup
unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)-s_es-s_inodes_count);
ext4_group_t block_group;
int bit;
-   struct buffer_head *bitmap_bh = NULL;
+   struct buffer_head *bitmap_bh;
struct inode *inode = NULL;
+   long err = -EIO;
 
/* Error cases - e2fsck has already cleaned up for us */
if (ino  max_ino) {
ext4_warning(sb, __FUNCTION__,
 bad orphan ino %lu!  e2fsck was run?, ino);
-   goto out;
+   goto error;
}
 
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@@ -798,38 +799,49 @@ struct inode *ext4_orphan_get(struct sup
if (!bitmap_bh) {
ext4_warning(sb, __FUNCTION__,
 inode bitmap error for orphan %lu, ino);
-   goto out;
+   goto error;
}
 
/* Having the inode bit set should be a 100% indicator that this
 * is a valid orphan (no e2fsck run on fs).  Orphans also include
 * inodes that were being truncated, so we can't check i_nlink==0.
 */
-   if (!ext4_test_bit(bit, bitmap_bh-b_data) ||
-   !(inode = iget(sb, ino)) || is_bad_inode(inode) ||
-   NEXT_ORPHAN(inode)  max_ino) {
-   ext4_warning(sb, __FUNCTION__,
-bad orphan inode %lu!  e2fsck was run?, ino);
-   printk(KERN_NOTICE ext4_test_bit(bit=%d, block=%llu) = %d\n,
-  bit, (unsigned long long)bitmap_bh-b_blocknr,
-  ext4_test_bit(bit, bitmap_bh-b_data));
-   printk(KERN_NOTICE inode=%p\n, inode);
-   if (inode) {
-   printk(KERN_NOTICE is_bad_inode(inode)=%d\n,
-  is_bad_inode(inode));
-   printk(KERN_NOTICE NEXT_ORPHAN(inode)=%u\n,
-  NEXT_ORPHAN(inode));
-   printk(KERN_NOTICE max_ino=%lu\n, max_ino);
-   }
+   if (!ext4_test_bit(bit, bitmap_bh-b_data))
+   goto bad_orphan;
+
+   inode = ext4_iget(sb, ino);
+   if (IS_ERR(inode))
+   goto iget_failed;
+
+   if (NEXT_ORPHAN(inode)  max_ino)
+   goto bad_orphan;
+   brelse(bitmap_bh);
+   return inode;
+
+iget_failed:
+   err = PTR_ERR(inode);
+   inode = NULL;
+bad_orphan:
+   ext4_warning(sb, __FUNCTION__,
+bad orphan inode %lu!  e2fsck was run?, ino);
+   printk(KERN_NOTICE ext4_test_bit(bit=%d, block=%llu) = %d\n,
+  bit, (unsigned long long)bitmap_bh-b_blocknr,
+  ext4_test_bit(bit, bitmap_bh-b_data));
+   printk(KERN_NOTICE inode=%p\n, inode);
+   if (inode) {
+   printk(KERN_NOTICE is_bad_inode(inode)=%d\n,
+  is_bad_inode(inode));
+   printk(KERN_NOTICE NEXT_ORPHAN(inode)=%u\n,
+  NEXT_ORPHAN(inode));
+   printk(KERN_NOTICE max_ino=%lu\n, max_ino);
/* Avoid freeing blocks if we got a bad deleted inode */
-   if (inode  inode-i_nlink == 0)
+   if (inode-i_nlink == 0)
inode-i_blocks = 0;
iput(inode);
-   inode = NULL;
}
-out:
brelse(bitmap_bh);
-   return inode;
+error:
+   return ERR_PTR(err);
 }
 
 unsigned long ext4_count_free_inodes (struct super_block * sb)
diff -puN fs/ext4/inode.c~iget-stop-ext4-from-using-iget-and-read_inode-try 
fs/ext4/inode.c
--- a/fs/ext4/inode.c~iget-stop-ext4-from-using-iget-and-read_inode-try
+++ a/fs/ext4/inode.c
@@ -2680,21 +2680,31 @@ static blkcnt_t 

Re: BUG_ON at mballoc.c:3752

2008-02-07 Thread Aneesh Kumar K.V
On Wed, Feb 06, 2008 at 03:59:48PM -0600, Dave Kleikamp wrote:
 
 File systems should not call BUG() due to a corrupt file system.
 Instead the code should fail the operation, possibly marking the file
 system read-only (or panicking) depending on the errors= mount option.
 

Eric Sandeen explained me the same on IRC. I was busy with the migrate
locking bug. That's why i didn't update here. Today i tried to reproduce
the problem using the image provided. But in my case it is not hitting
the BUG_ON (mostly due to single cpu). I did look at the code and am not
still not clear how we can hit that BUG_ON. prealloc free space pa_free is
generated out of bitmap. So only if something corrupted bitmap after we
initialized prealloc space we will hit this case. In mballoc we error out
if the block allocated or fall in system zone. One thing i noticed is,
the journal is corrupt. So the only possibility that i have is journal write
resulted in bitmap corruption.

I also looked at the mballoc to make sure we don't panic in case of a
corrupt bitmap. Below is the patch that i have now. This one is yet to
go through the ABAT test but it would be nice to see whether the below
change cause any other issues.

Eric ,
can you run the test with below patch and see if this makes any
difference ?. I know we are not fixing any bugs in the below patch.


ext4: Don't panic in case of corrupt bitmap

From: Aneesh Kumar K.V [EMAIL PROTECTED]

Multiblock allocator was calling BUG_ON in many case if the free and used
blocks count obtained looking at the bitmap is different from what
the allocator internally accounted for. Use ext4_error in such case
and don't panic the system.

Signed-off-by: Aneesh Kumar K.V [EMAIL PROTECTED]
---

 fs/ext4/mballoc.c |   35 +--
 1 files changed, 21 insertions(+), 14 deletions(-)


diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 06d1f52..656729b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -680,7 +680,6 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int 
order, int *max)
 {
char *bb;
 
-   /* FIXME!! is this needed */
BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
BUG_ON(max == NULL);
 
@@ -964,7 +963,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
grp-bb_fragments = fragments;
 
if (free != grp-bb_free) {
-   printk(KERN_DEBUG
+   ext4_error(sb, __FUNCTION__,
EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n,
group, free, grp-bb_free);
grp-bb_free = free;
@@ -1821,13 +1820,24 @@ static void ext4_mb_complex_scan_group(struct 
ext4_allocation_context *ac,
i = ext4_find_next_zero_bit(bitmap,
EXT4_BLOCKS_PER_GROUP(sb), i);
if (i = EXT4_BLOCKS_PER_GROUP(sb)) {
-   BUG_ON(free != 0);
+   /*
+* IF we corrupt the bitmap  we won't find any
+* free blocks even though group info says we
+* we have free blocks
+*/
+   ext4_error(sb, __FUNCTION__, %d free blocks as per 
+   group info. But bitmap says 0\n,
+   free);
break;
}
 
mb_find_extent(e4b, 0, i, ac-ac_g_ex.fe_len, ex);
BUG_ON(ex.fe_len = 0);
-   BUG_ON(free  ex.fe_len);
+   if (free  ex.fe_len) {
+   ext4_error(sb, __FUNCTION__, %d free blocks as per 
+   group info. But got %d blocks\n,
+   free, ex.fe_len);
+   }
 
ext4_mb_measure_extent(ac, ex, e4b);
 
@@ -3354,13 +3364,10 @@ static void ext4_mb_use_group_pa(struct 
ext4_allocation_context *ac,
ac-ac_pa = pa;
 
/* we don't correct pa_pstart or pa_plen here to avoid
-* possible race when tte group is being loaded concurrently
+* possible race when the group is being loaded concurrently
 * instead we correct pa later, after blocks are marked
-* in on-disk bitmap -- see ext4_mb_release_context() */
-   /*
-* FIXME!! but the other CPUs can look at this particular
-* pa and think that it have enought free blocks if we
-* don't update pa_free here right ?
+* in on-disk bitmap -- see ext4_mb_release_context()
+* Other CPUs are prevented from allocating from this pa by lg_mutex
 */
mb_debug(use %u/%u from group pa %p\n, pa-pa_lstart-len, len, pa);
 }
@@ -3743,13 +3750,13 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy 
*e4b,
bit = next + 1;
}
if (free != pa-pa_free) {
-   printk(KERN_ERR pa %p: logic %lu, phys. %lu, len %lu\n,
+  

Re: [PATCH 0/3] Move fsck from e2fsprogs to util-linux-ng

2008-02-07 Thread Theodore Tso
On Thu, Feb 07, 2008 at 01:37:43AM +0100, Kay Sievers wrote:
 
 Care to explain what ext4 development has to do with the generic
 fsck program?  I don''t see any convincing reason not to move that
 now.

Fsck and mount (and especially mount) needs to be linked against the
very latest blkid library in order to make sure things work, and when
we spread out packages across the various different packages, it means
updating things to assure correct functionality is that much harder.

It also means getting the right ext4 detection routines into vol_id,
and in the past I've had real problems getting patches that fix real
bugs in vol_id past you, and I'm just too busy right now to deal with
that right now.

- Ted
-
To unsubscribe from this list: send the line unsubscribe linux-ext4 in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] e2fsprogs: New bitmap and inode table allocation for FLEX_BG

2008-02-07 Thread Jose R. Santos
New bitmap and inode table allocation for FLEX_BG

From: Jose R. Santos [EMAIL PROTECTED]

Change the way we allocate bitmaps and inode tables if the FLEX_BG
feature is used at mke2fs time.  It places calculates a new offset for
bitmaps and inode table base on the number of groups that the user
wishes to pack together using the new -G option.  Creating a
filesystem with 64 block groups in a flex group can be done by:

mke2fs -j -I 256 -O flex_bg -G 32 /dev/sdX

Signed-off-by: Jose R. Santos [EMAIL PROTECTED]
Signed-off-by: Valerie Clement [EMAIL PROTECTED]
---

 lib/ext2fs/alloc_tables.c |  116 -
 lib/ext2fs/closefs.c  |6 ++
 lib/ext2fs/ext2_fs.h  |6 ++
 lib/ext2fs/initialize.c   |6 ++
 misc/mke2fs.c |   25 +-
 5 files changed, 151 insertions(+), 8 deletions(-)


diff --git a/lib/ext2fs/alloc_tables.c b/lib/ext2fs/alloc_tables.c
index 4ad2ba9..8281858 100644
--- a/lib/ext2fs/alloc_tables.c
+++ b/lib/ext2fs/alloc_tables.c
@@ -27,18 +27,82 @@
 #include ext2_fs.h
 #include ext2fs.h
 
+void ext2fs_bgd_set_flex_meta_flag(ext2_filsys fs, blk_t block)
+{
+   dgrp_t  group;
+
+   group = ext2fs_group_of_blk(fs, block);
+   if (!(fs-group_desc[group].bg_flags  EXT2_BG_FLEX_METADATA))
+   fs-group_desc[group].bg_flags |= EXT2_BG_FLEX_METADATA;
+}
+
+blk_t ext2fs_flexbg_offset(ext2_filsys fs, dgrp_t group, blk_t start_blk,
+  ext2fs_block_bitmap bmap, int offset, int size)
+{
+   int flexbg, flexbg_size, elem_size;
+   blk_t   last_blk, first_free = 0;
+   dgrp_t  last_grp;
+
+   flexbg_size = 1  fs-super-s_log_groups_per_flex;
+   flexbg = group / flexbg_size;
+
+   if (size  fs-super-s_blocks_per_group / 8)
+   size = fs-super-s_blocks_per_group / 8;
+
+   /*
+* Dont do a long search if the previous block
+* search is still valid.
+*/
+   if (start_blk  group % flexbg_size) {
+   if (size  flexbg_size)
+   elem_size = fs-inode_blocks_per_group;
+   else
+   elem_size = 1;
+   if (ext2fs_test_block_bitmap_range(bmap, start_blk + elem_size,
+  size))
+   return start_blk + elem_size;
+   }
+
+   start_blk = ext2fs_group_first_block(fs, flexbg_size * flexbg);
+   last_grp = (group + (flexbg_size - (group % flexbg_size) - 1));
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   if (last_grp  fs-group_desc_count)
+   last_grp = fs-group_desc_count;
+
+   /* Find the first available block */
+   if (ext2fs_get_free_blocks(fs, start_blk, last_blk, 1, bmap,
+  first_free))
+   return first_free;
+
+   if (ext2fs_get_free_blocks(fs, first_free + offset, last_blk, size,
+  bmap, first_free))
+   return first_free;
+
+   return first_free;
+}
+
 errcode_t ext2fs_allocate_group_table(ext2_filsys fs, dgrp_t group,
  ext2fs_block_bitmap bmap)
 {
errcode_t   retval;
blk_t   group_blk, start_blk, last_blk, new_blk, blk;
-   int j;
+   dgrp_t  last_grp;
+   int j, rem_grps, flexbg_size = 0;
 
group_blk = ext2fs_group_first_block(fs, group);
last_blk = ext2fs_group_last_block(fs, group);
 
if (!bmap)
bmap = fs-block_map;
+
+   if (EXT2_HAS_INCOMPAT_FEATURE (fs-super,
+  EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+   flexbg_size = 1  fs-super-s_log_groups_per_flex;
+   rem_grps = flexbg_size - (group % flexbg_size);
+   last_grp = group + rem_grps - 1;
+   if (last_grp  fs-group_desc_count)
+   last_grp = fs-group_desc_count;
+   }

/*
 * Allocate the block and inode bitmaps, if necessary
@@ -56,6 +120,15 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
} else
start_blk = group_blk;
 
+   if (flexbg_size) {
+   int prev_block = 0;
+   if (group  fs-group_desc[group-1].bg_block_bitmap)
+   prev_block = fs-group_desc[group-1].bg_block_bitmap;
+   start_blk = ext2fs_flexbg_offset(fs, group, prev_block, bmap,
+0, rem_grps);
+   last_blk = ext2fs_group_last_block(fs, last_grp);
+   }
+
if (!fs-group_desc[group].bg_block_bitmap) {
retval = ext2fs_get_free_blocks(fs, start_blk, last_blk,
1, bmap, new_blk);
@@ -66,6 +139,21 @@ errcode_t ext2fs_allocate_group_table(ext2_filsys fs, 
dgrp_t group,
return 

Re: [PATCH] allocate struct ext4_allocation_context from a kmem cache to save stack space

2008-02-07 Thread Mingming Cao
On Wed, 2008-02-06 at 11:05 -0600, Eric Sandeen wrote:
 struct ext4_allocation_context is rather large, and this bloats
 the stack of many functions which use it.  Allocating it from
 a named slab cache will alleviate this.
 
 For example, with this change (on top of the noinline patch sent earlier):
 
 -ext4_mb_new_blocks   200
 +ext4_mb_new_blocks40
 
 -ext4_mb_free_blocks  344
 +ext4_mb_free_blocks  168
 
 -ext4_mb_release_inode_pa 216
 +ext4_mb_release_inode_pa  40
 
 -ext4_mb_release_group_pa 192
 +ext4_mb_release_group_pa  24
 
 Most of these stack-allocated structs are actually used only for
 mballoc history; and in those cases often a smaller struct would do.
 So changing that may be another way around it, at least for those
 functions, if preferred.  For now, in those cases where the ac
 is only for history, an allocation failure simply skips the history
 recording, and does not cause any other failures.
 
 Comments?
 
 Signed-off-by: Eric Sandeen [EMAIL PROTECTED]
 ---
 
 Index: linux-2.6.24-rc6-mm1/fs/ext4/mballoc.c
 ===
 --- linux-2.6.24-rc6-mm1.orig/fs/ext4/mballoc.c
 +++ linux-2.6.24-rc6-mm1/fs/ext4/mballoc.c
 @@ -419,6 +419,7 @@
  #define MB_DEFAULT_GROUP_PREALLOC512
 
  static struct kmem_cache *ext4_pspace_cachep;
 +static struct kmem_cache *ext4_ac_cachep;
 
  #ifdef EXT4_BB_MAX_BLOCKS
  #undef EXT4_BB_MAX_BLOCKS
 @@ -2958,11 +2959,18 @@ int __init init_ext4_mballoc(void)
   if (ext4_pspace_cachep == NULL)
   return -ENOMEM;
 
 -#ifdef CONFIG_PROC_FS
 + ext4_ac_cachep =
 + kmem_cache_create(ext4_alloc_context,
 +  sizeof(struct ext4_allocation_context),
 +  0, SLAB_RECLAIM_ACCOUNT, NULL);
 + if (ext4_ac_cachep == NULL) {
 + kmem_cache_destroy(ext4_pspace_cachep);
 + return -ENOMEM;
 + }
 +
   proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs);
   if (proc_root_ext4 == NULL)
   printk(KERN_ERR EXT4-fs: Unable to create %s\n, EXT4_ROOT);
 -#endif
 
   return 0;
  }
 @@ -2971,9 +2979,8 @@ void exit_ext4_mballoc(void)
  {
   /* XXX: synchronize_rcu(); */
   kmem_cache_destroy(ext4_pspace_cachep);
 -#ifdef CONFIG_PROC_FS
 + kmem_cache_destroy(ext4_ac_cachep);
   remove_proc_entry(EXT4_ROOT, proc_root_fs);
 -#endif
  }
 
 

Do you intend to remove the #ifdef CONFIG_PROC_FS, or it's a accident? I
think we need keep that to allow ext4 build without procfs configured.

Other than this, the patch looks fine to me.:)

Mingming

-
To unsubscribe from this list: send the line unsubscribe linux-ext4 in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] allocate struct ext4_allocation_context from a kmem cache to save stack space

2008-02-07 Thread Eric Sandeen
Mingming Cao wrote:

 Do you intend to remove the #ifdef CONFIG_PROC_FS, or it's a accident? I
 think we need keep that to allow ext4 build without procfs configured.
 
 Other than this, the patch looks fine to me.:)

oh, it kind of snuck in.  It actually should still build, as
remove_proc_entry is a no-op function w/o the config option.

Feel free to leave it in if you like though.

-Eric
-
To unsubscribe from this list: send the line unsubscribe linux-ext4 in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BUG_ON at mballoc.c:3752

2008-02-07 Thread Mingming Cao
On Thu, 2008-02-07 at 18:25 +0530, Aneesh Kumar K.V wrote:

 ext4: Don't panic in case of corrupt bitmap
 
 From: Aneesh Kumar K.V [EMAIL PROTECTED]
 
 Multiblock allocator was calling BUG_ON in many case if the free and used
 blocks count obtained looking at the bitmap is different from what
 the allocator internally accounted for. Use ext4_error in such case
 and don't panic the system.
 

There seems a lot of BUG_ON() and BUG() in mballoc code, other than this
case. Should it always panic the whole system in those cases? Perhaps
replacing with ext4_error() or some cases just WARN_ON is enough.

Mingming
 Signed-off-by: Aneesh Kumar K.V [EMAIL PROTECTED]
 ---
 
  fs/ext4/mballoc.c |   35 +--
  1 files changed, 21 insertions(+), 14 deletions(-)
 
 
 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
 index 06d1f52..656729b 100644
 --- a/fs/ext4/mballoc.c
 +++ b/fs/ext4/mballoc.c
 @@ -680,7 +680,6 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int 
 order, int *max)
  {
   char *bb;
 
 - /* FIXME!! is this needed */
   BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
   BUG_ON(max == NULL);
 



 @@ -964,7 +963,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
   grp-bb_fragments = fragments;
 
   if (free != grp-bb_free) {
 - printk(KERN_DEBUG
 + ext4_error(sb, __FUNCTION__,
   EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n,
   group, free, grp-bb_free);
   grp-bb_free = free;
 @@ -1821,13 +1820,24 @@ static void ext4_mb_complex_scan_group(struct 
 ext4_allocation_context *ac,
   i = ext4_find_next_zero_bit(bitmap,
   EXT4_BLOCKS_PER_GROUP(sb), i);
   if (i = EXT4_BLOCKS_PER_GROUP(sb)) {
 - BUG_ON(free != 0);
 + /*
 +  * IF we corrupt the bitmap  we won't find any
 +  * free blocks even though group info says we
 +  * we have free blocks
 +  */
 + ext4_error(sb, __FUNCTION__, %d free blocks as per 
 + group info. But bitmap says 0\n,
 + free);
   break;
   }
 
   mb_find_extent(e4b, 0, i, ac-ac_g_ex.fe_len, ex);
   BUG_ON(ex.fe_len = 0);
 - BUG_ON(free  ex.fe_len);
 + if (free  ex.fe_len) {
 + ext4_error(sb, __FUNCTION__, %d free blocks as per 
 + group info. But got %d blocks\n,
 + free, ex.fe_len);
 + }
 
   ext4_mb_measure_extent(ac, ex, e4b);
 
 @@ -3354,13 +3364,10 @@ static void ext4_mb_use_group_pa(struct 
 ext4_allocation_context *ac,
   ac-ac_pa = pa;
 
   /* we don't correct pa_pstart or pa_plen here to avoid
 -  * possible race when tte group is being loaded concurrently
 +  * possible race when the group is being loaded concurrently
* instead we correct pa later, after blocks are marked
 -  * in on-disk bitmap -- see ext4_mb_release_context() */
 - /*
 -  * FIXME!! but the other CPUs can look at this particular
 -  * pa and think that it have enought free blocks if we
 -  * don't update pa_free here right ?
 +  * in on-disk bitmap -- see ext4_mb_release_context()
 +  * Other CPUs are prevented from allocating from this pa by lg_mutex
*/
   mb_debug(use %u/%u from group pa %p\n, pa-pa_lstart-len, len, pa);
  }
 @@ -3743,13 +3750,13 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy 
 *e4b,
   bit = next + 1;
   }
   if (free != pa-pa_free) {
 - printk(KERN_ERR pa %p: logic %lu, phys. %lu, len %lu\n,
 + printk(KERN_CRIT pa %p: logic %lu, phys. %lu, len %lu\n,
   pa, (unsigned long) pa-pa_lstart,
   (unsigned long) pa-pa_pstart,
   (unsigned long) pa-pa_len);
 - printk(KERN_ERR free %u, pa_free %u\n, free, pa-pa_free);
 + ext4_error(sb, __FUNCTION__, free %u, pa_free %u\n,
 + free, pa-pa_free);
   }
 - BUG_ON(free != pa-pa_free);
   atomic_add(free, sbi-s_mb_discarded);
 
   return err;
 @@ -4405,7 +4412,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode 
 *inode,
   unsigned long block, unsigned long count,
   int metadata, unsigned long *freed)
  {
 - struct buffer_head *bitmap_bh = 0;
 + struct buffer_head *bitmap_bh = NULL;
   struct super_block *sb = inode-i_sb;
   struct ext4_allocation_context ac;
   struct ext4_group_desc *gdp;
 -
 To unsubscribe from this list: send the line unsubscribe linux-ext4 in
 the body of a message to [EMAIL 

Re: [PATCH] allocate struct ext4_allocation_context from a kmem cache to save stack space

2008-02-07 Thread Mingming Cao
On Thu, 2008-02-07 at 19:06 -0600, Eric Sandeen wrote:
 Mingming Cao wrote:
 
  Do you intend to remove the #ifdef CONFIG_PROC_FS, or it's a accident? I
  think we need keep that to allow ext4 build without procfs configured.
  
  Other than this, the patch looks fine to me.:)
 
 oh, it kind of snuck in.  It actually should still build, as
 remove_proc_entry is a no-op function w/o the config option.

Oh, I mean the proc_mkdir(EXT4_ROOT, proc_root_fs) will complain w/o
CONFIG_PROC_FS configured.

Mingming

-
To unsubscribe from this list: send the line unsubscribe linux-ext4 in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] allocate struct ext4_allocation_context from a kmem cache to save stack space

2008-02-07 Thread Eric Sandeen
Mingming Cao wrote:
 On Thu, 2008-02-07 at 19:06 -0600, Eric Sandeen wrote:
 Mingming Cao wrote:

 Do you intend to remove the #ifdef CONFIG_PROC_FS, or it's a accident? I
 think we need keep that to allow ext4 build without procfs configured.

 Other than this, the patch looks fine to me.:)
 oh, it kind of snuck in.  It actually should still build, as
 remove_proc_entry is a no-op function w/o the config option.
 
 Oh, I mean the proc_mkdir(EXT4_ROOT, proc_root_fs) will complain w/o
 CONFIG_PROC_FS configured.
 
 Mingming
 

it'll build:

static inline struct proc_dir_entry *proc_mkdir(const char *name,
struct proc_dir_entry *parent) {return NULL;}

yes, it'll issue a printk though.  *shrug*

I like fewer #ifdefs better than more, but doesn't matter much to me.

-Eric
-
To unsubscribe from this list: send the line unsubscribe linux-ext4 in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Bugme-new] [Bug 9911] New: fsync blocks concurrent writes to file

2008-02-07 Thread Andrew Morton
On Thu,  7 Feb 2008 17:40:57 -0800 (PST) [EMAIL PROTECTED] wrote:

 http://bugzilla.kernel.org/show_bug.cgi?id=9911
 
Summary: fsync blocks concurrent writes to file
Product: File System
Version: 2.5
  KernelVersion: 2.6.23.8
   Platform: All
 OS/Version: Linux
   Tree: Mainline
 Status: NEW
   Severity: normal
   Priority: P1
  Component: ext3
 AssignedTo: [EMAIL PROTECTED]
 ReportedBy: [EMAIL PROTECTED]
 
 
 Latest working kernel version: none
 Earliest failing kernel version: 2.6.18
 Distribution: RHEL 4  Fedora 8
 Hardware Environment: Dual Xeon  dual-core Athlon
 Software Environment: Java 1.5.0_14
 Problem Description: Multiple threads append transactions to a single file.
 When fsyncs are issued, no writes complete until the fsync completes. This has
 the effect of yielding the same write rate regardless of how many threads are
 writing to the file. Solaris 10 does not exhibit this problem and scales well
 as additional threads are added.
 
 I have confirmed this by running strace and witnessing that writes block until
 immediately after the fsync completes.
 
 When using truss on Solaris I have witnessed multiple writes completing while
 an fsync was in progress.
 
 Steps to reproduce:
 
 Open a file for writing.
 Launch multiple threads with each one writing data and followed by an fsync,
 but only if data has been written since the last fsync.
 
 
 I have googled this issue substantially and have found no answers.
 

I supposing teaching java about sync_file_range() would be all too hard.

-
To unsubscribe from this list: send the line unsubscribe linux-ext4 in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: BUG_ON at mballoc.c:3752

2008-02-07 Thread Eric Sandeen
Aneesh Kumar K.V wrote:
 On Thu, Feb 07, 2008 at 05:30:48PM -0800, Mingming Cao wrote:
 On Thu, 2008-02-07 at 18:25 +0530, Aneesh Kumar K.V wrote:

 ext4: Don't panic in case of corrupt bitmap

 From: Aneesh Kumar K.V [EMAIL PROTECTED]

 Multiblock allocator was calling BUG_ON in many case if the free and used
 blocks count obtained looking at the bitmap is different from what
 the allocator internally accounted for. Use ext4_error in such case
 and don't panic the system.

 There seems a lot of BUG_ON() and BUG() in mballoc code, other than this
 case. Should it always panic the whole system in those cases? Perhaps
 replacing with ext4_error() or some cases just WARN_ON is enough.

 
 I had looked at the BUG_ON in mballoc code and found them very useful
 while stabilizing the mballoc code. It helped to catch wrong usage of
 functions. Most of the BUG_ON are there to make sure we call the API
 with the lock held or the API should not return value greater than 'x'
 Should not call the function with a particular argument as NULL ...etc
 kind of thing. So i would suggest to keep them as such.

Yep - as long as the BUG_ONs are for programming errors, and not things
like memory allocation failures or corrupted disks, I think it's ok.
Not that I've audited them all.  :)

I asked about this early on, and got that answer... I'm reasonably
satisfied with it.

-Eric
-
To unsubscribe from this list: send the line unsubscribe linux-ext4 in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html