[27/50] ext34: ensure do_split leaves enough free space in both blocks

2007-09-24 Thread Greg KH
From: Eric Sandeen <[EMAIL PROTECTED]>

commit ef2b02d3e617cb0400eedf2668f86215e1b0e6af in mainline.

The do_split() function for htree dir blocks is intended to split a leaf
block to make room for a new entry.  It sorts the entries in the original
block by hash value, then moves the last half of the entries to the new
block - without accounting for how much space this actually moves.  (IOW,
it moves half of the entry *count* not half of the entry *space*).  If by
chance we have both large & small entries, and we move only the smallest
entries, and we have a large new entry to insert, we may not have created
enough space for it.

The patch below stores each record size when calculating the dx_map, and
then walks the hash-sorted dx_map, calculating how many entries must be
moved to more evenly split the existing entries between the old block and
the new block, guaranteeing enough space for the new entry.

The dx_map "offs" member is reduced to u16 so that the overall map size
does not change - it is temporarily stored at the end of the new block, and
if it grows too large it may be overwritten.  By making offs and size both
u16, we won't grow the map size.

Also add a few comments to the functions involved.

This fixes the testcase reported by [EMAIL PROTECTED] on the
linux-ext4 list, "ext3 dir_index causes an error"

Thanks to Andreas Dilger for discussing the problem & solution with me.

Signed-off-by: Eric Sandeen <[EMAIL PROTECTED]>
Signed-off-by: Andreas Dilger <[EMAIL PROTECTED]>
Tested-by: Junjiro Okajima <[EMAIL PROTECTED]>
Cc: Theodore Ts'o <[EMAIL PROTECTED]>
Cc: ext4 
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>
Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]>

---
 fs/ext3/namei.c |   39 +++
 fs/ext4/namei.c |   39 +++
 2 files changed, 70 insertions(+), 8 deletions(-)

--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
 struct dx_map_entry
 {
u32 hash;
-   u32 offs;
+   u16 offs;
+   u16 size;
 };
 
 #ifdef CONFIG_EXT3_INDEX
@@ -671,6 +672,10 @@ errout:
  * Directory block splitting, compacting
  */
 
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
 static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry 
*map_tail)
 {
@@ -684,7 +689,8 @@ static int dx_make_map (struct ext3_dir_
ext3fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
-   map_tail->offs = (u32) ((char *) de - base);
+   map_tail->offs = (u16) ((char *) de - base);
+   map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +700,7 @@ static int dx_make_map (struct ext3_dir_
return count;
 }
 
+/* Sort map by hash value */
 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
 {
 struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1081,6 +1088,10 @@ static inline void ext3_set_de_type(stru
 }
 
 #ifdef CONFIG_EXT3_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
 static struct ext3_dir_entry_2 *
 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 {
@@ -1099,6 +1110,10 @@ dx_move_dirents(char *from, char *to, st
return (struct ext3_dir_entry_2 *) (to - rec_len);
 }
 
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
 static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
 {
struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct 
ext3_dir_entry_2 *) base;
@@ -1121,6 +1136,11 @@ static struct ext3_dir_entry_2* dx_pack_
return prev;
 }
 
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally 
full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
 static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1132,7 +1152,7 @@ static struct ext3_dir_entry_2 *do_split
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
-   unsigned split;
+   unsigned split, move, size, i;
struct ext3_dir_entry_2 *de = NULL, *de2;
int err = 0;
 
@@ -1160,8 +1180,19 @@ static struct ext3_dir_entry_2 *do_split
count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
 bloc

[2.6.22.2 review 59/84] jbd2 commit: fix transaction dropping

2007-08-07 Thread Greg KH

From: Jan Kara <[EMAIL PROTECTED]>

We have to check that also the second checkpoint list is non-empty before
dropping the transaction.

Signed-off-by: Jan Kara <[EMAIL PROTECTED]>
Cc: Chuck Ebbert <[EMAIL PROTECTED]>
Cc: Kirill Korotaev <[EMAIL PROTECTED]>
Cc: 
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]>

---
 fs/jbd2/commit.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -896,7 +896,8 @@ restart_loop:
journal->j_committing_transaction = NULL;
spin_unlock(&journal->j_state_lock);
 
-   if (commit_transaction->t_checkpoint_list == NULL) {
+   if (commit_transaction->t_checkpoint_list == NULL &&
+   commit_transaction->t_checkpoint_io_list == NULL) {
__jbd2_journal_drop_transaction(journal, commit_transaction);
} else {
if (journal->j_checkpoint_transactions == NULL) {

-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[2.6.22.2 review 58/84] jbd commit: fix transaction dropping

2007-08-07 Thread Greg KH

From: Jan Kara <[EMAIL PROTECTED]>

We have to check that also the second checkpoint list is non-empty before
dropping the transaction.

Signed-off-by: Jan Kara <[EMAIL PROTECTED]>
Cc: Chuck Ebbert <[EMAIL PROTECTED]>
Cc: Kirill Korotaev <[EMAIL PROTECTED]>
Cc: 
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]>

---
 fs/jbd/commit.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -887,7 +887,8 @@ restart_loop:
journal->j_committing_transaction = NULL;
spin_unlock(&journal->j_state_lock);
 
-   if (commit_transaction->t_checkpoint_list == NULL) {
+   if (commit_transaction->t_checkpoint_list == NULL &&
+   commit_transaction->t_checkpoint_io_list == NULL) {
__journal_drop_transaction(journal, commit_transaction);
} else {
if (journal->j_checkpoint_transactions == NULL) {

-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[2.6.22.2 review 43/84] "ext4_ext_put_in_cache" uses __u32 to receive physical block number

2007-08-07 Thread Greg KH

From: Mingming Cao <[EMAIL PROTECTED]>

Yan Zheng wrote:

> I think I found a bug in ext4/extents.c, "ext4_ext_put_in_cache" uses
> "__u32" to receive physical block number.  "ext4_ext_put_in_cache" is
> used in "ext4_ext_get_blocks", it sets ext4 inode's extent cache
> according most recently tree lookup (higher 16 bits of saved physical
> block number are always zero). when serving a mapping request,
> "ext4_ext_get_blocks" first check whether the logical block is in
> inode's extent cache. if the logical block is in the cache and the
> cached region isn't a gap, "ext4_ext_get_blocks" gets physical block
> number by using cached region's physical block number and offset in
> the cached region.  as described above, "ext4_ext_get_blocks" may
> return wrong result when there are physical block numbers bigger than
> 0x.
>

You are right.  Thanks for reporting this!

Signed-off-by: Mingming Cao <[EMAIL PROTECTED]>
Cc: Yan Zheng <[EMAIL PROTECTED]>
Cc: 
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]>

---
 fs/ext4/extents.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1445,7 +1445,7 @@ int ext4_ext_walk_space(struct inode *in
 
 static void
 ext4_ext_put_in_cache(struct inode *inode, __u32 block,
-   __u32 len, __u32 start, int type)
+   __u32 len, ext4_fsblk_t start, int type)
 {
struct ext4_ext_cache *cex;
BUG_ON(len == 0);

-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATHC] Fix for ext2 reservation (Re: -mm merge plans for 2.6.23)

2007-07-10 Thread Greg KH
On Tue, Jul 10, 2007 at 05:04:17PM -0700, Badari Pulavarty wrote:
> On Tue, 2007-07-10 at 16:55 -0700, Andrew Morton wrote:
> > On Tue, 10 Jul 2007 15:15:57 -0700
> > Badari Pulavarty <[EMAIL PROTECTED]> wrote:
> > 
> > > Well, I looked at the problem now and here is the fix :)
> > 
> > whee, thanks.
> > 
> > > Greg, Please consider this for stable release also.
> > 
> > No, it is only relevant to -mm's ext2-reservations.patch.
> 
> Yes. Sorry. I was looking at -mm tree and assumed that
> ext2-reservation code made into mainline also :(
> 
> Greg, please ignore the patch for stable.

Ok, dropped from my -stable queue mbox.

thanks,

greg k-h
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 28/31] revert "retries in ext3_prepare_write() violate ordering requirements"

2007-04-11 Thread Greg KH
-stable review patch.  If anyone has any objections, please let us know.

--

From: Andrew Morton <[EMAIL PROTECTED]>

Revert e92a4d595b464c4aae64be39ca61a9ffe9c8b278.

Dmitry points out

"When we block_prepare_write() failed while ext3_prepare_write() we jump to
 "failure" label and call ext3_prepare_failure() witch search last mapped bh
 and invoke commit_write untill it.  This is wrong!!  because some bh from
 begining to the last mapped bh may be not uptodate.  As a result we commit to
 disk not uptodate page content witch contains garbage from previous usage."

and

"Unexpected file size increasing."

   Call trace the same as it was in first issue but result is different. 
   For example we have file with i_size is zero.  we want write two blocks ,
   but fs has only one free block.

   ->ext3_prepare_write(...from == 0, to == 2048)
 retry:
 ->block_prepare_write() == -ENOSPC# we failed but allocated one block here.
 ->ext3_prepare_failure()
   ->commit_write( from == 0, to == 1024) # after this i_size becomes 1024 
:)
 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;

   Finally when all retries will be spended ext3_prepare_failure return
   -ENOSPC, but i_size was increased and later block trimm procedures can't
   help here.

We don't appear to have the horsepower to fix these issues, so let's put
things back the way they were for now.

Cc: Kirill Korotaev <[EMAIL PROTECTED]>
Cc: Ingo Molnar <[EMAIL PROTECTED]>
Cc: Ken Chen <[EMAIL PROTECTED]>
Cc: Andrey Savochkin <[EMAIL PROTECTED]>
Cc: 
Cc: Dmitriy Monakhov <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]>


---
 fs/ext3/inode.c |   85 ++--
 1 file changed, 10 insertions(+), 75 deletions(-)

--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1148,102 +1148,37 @@ static int do_journal_get_write_access(h
return ext3_journal_get_write_access(handle, bh);
 }
 
-/*
- * The idea of this helper function is following:
- * if prepare_write has allocated some blocks, but not all of them, the
- * transaction must include the content of the newly allocated blocks.
- * This content is expected to be set to zeroes by block_prepare_write().
- * 2006/10/14  SAW
- */
-static int ext3_prepare_failure(struct file *file, struct page *page,
-   unsigned from, unsigned to)
-{
-   struct address_space *mapping;
-   struct buffer_head *bh, *head, *next;
-   unsigned block_start, block_end;
-   unsigned blocksize;
-   int ret;
-   handle_t *handle = ext3_journal_current_handle();
-
-   mapping = page->mapping;
-   if (ext3_should_writeback_data(mapping->host)) {
-   /* optimization: no constraints about data */
-skip:
-   return ext3_journal_stop(handle);
-   }
-
-   head = page_buffers(page);
-   blocksize = head->b_size;
-   for (   bh = head, block_start = 0;
-   bh != head || !block_start;
-   block_start = block_end, bh = next)
-   {
-   next = bh->b_this_page;
-   block_end = block_start + blocksize;
-   if (block_end <= from)
-   continue;
-   if (block_start >= to) {
-   block_start = to;
-   break;
-   }
-   if (!buffer_mapped(bh))
-   /* prepare_write failed on this bh */
-   break;
-   if (ext3_should_journal_data(mapping->host)) {
-   ret = do_journal_get_write_access(handle, bh);
-   if (ret) {
-   ext3_journal_stop(handle);
-   return ret;
-   }
-   }
-   /*
-* block_start here becomes the first block where the current iteration
-* of prepare_write failed.
-*/
-   }
-   if (block_start <= from)
-   goto skip;
-
-   /* commit allocated and zeroed buffers */
-   return mapping->a_ops->commit_write(file, page, from, block_start);
-}
-
 static int ext3_prepare_write(struct file *file, struct page *page,
  unsigned from, unsigned to)
 {
struct inode *inode = page->mapping->host;
-   int ret, ret2;
-   int needed_blocks = ext3_writepage_trans_blocks(inode);
+   int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
handle_t *handle;
int retries = 0;
 
 retry:
handle = ext3_journal_start(inode, needed_blocks);
-   if (IS_ERR(handle))
-   return PTR_ERR(handle);
+   if (IS_ERR(handle)) {
+   ret = PTR_ERR(handle);
+   goto out;
+   }
if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
ret = nobh_prepare_write(page, fr

[patch 29/31] revert "retries in ext4_prepare_write() violate ordering requirements"

2007-04-11 Thread Greg KH
-stable review patch.  If anyone has any objections, please let us know.

--

From: Andrew Morton <[EMAIL PROTECTED]>

Revert b46be05004abb419e303e66e143eed9f8a6e9f3f.  Same reasoning as for ext3.

Cc: Kirill Korotaev <[EMAIL PROTECTED]>
Cc: Ingo Molnar <[EMAIL PROTECTED]>
Cc: Ken Chen <[EMAIL PROTECTED]>
Cc: Andrey Savochkin <[EMAIL PROTECTED]>
Cc: 
Cc: Dmitriy Monakhov <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Greg Kroah-Hartman <[EMAIL PROTECTED]>

---
 fs/ext4/inode.c |   85 ++--
 1 file changed, 10 insertions(+), 75 deletions(-)

--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1147,102 +1147,37 @@ static int do_journal_get_write_access(h
return ext4_journal_get_write_access(handle, bh);
 }
 
-/*
- * The idea of this helper function is following:
- * if prepare_write has allocated some blocks, but not all of them, the
- * transaction must include the content of the newly allocated blocks.
- * This content is expected to be set to zeroes by block_prepare_write().
- * 2006/10/14  SAW
- */
-static int ext4_prepare_failure(struct file *file, struct page *page,
-   unsigned from, unsigned to)
-{
-   struct address_space *mapping;
-   struct buffer_head *bh, *head, *next;
-   unsigned block_start, block_end;
-   unsigned blocksize;
-   int ret;
-   handle_t *handle = ext4_journal_current_handle();
-
-   mapping = page->mapping;
-   if (ext4_should_writeback_data(mapping->host)) {
-   /* optimization: no constraints about data */
-skip:
-   return ext4_journal_stop(handle);
-   }
-
-   head = page_buffers(page);
-   blocksize = head->b_size;
-   for (   bh = head, block_start = 0;
-   bh != head || !block_start;
-   block_start = block_end, bh = next)
-   {
-   next = bh->b_this_page;
-   block_end = block_start + blocksize;
-   if (block_end <= from)
-   continue;
-   if (block_start >= to) {
-   block_start = to;
-   break;
-   }
-   if (!buffer_mapped(bh))
-   /* prepare_write failed on this bh */
-   break;
-   if (ext4_should_journal_data(mapping->host)) {
-   ret = do_journal_get_write_access(handle, bh);
-   if (ret) {
-   ext4_journal_stop(handle);
-   return ret;
-   }
-   }
-   /*
-* block_start here becomes the first block where the current iteration
-* of prepare_write failed.
-*/
-   }
-   if (block_start <= from)
-   goto skip;
-
-   /* commit allocated and zeroed buffers */
-   return mapping->a_ops->commit_write(file, page, from, block_start);
-}
-
 static int ext4_prepare_write(struct file *file, struct page *page,
  unsigned from, unsigned to)
 {
struct inode *inode = page->mapping->host;
-   int ret, ret2;
-   int needed_blocks = ext4_writepage_trans_blocks(inode);
+   int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
handle_t *handle;
int retries = 0;
 
 retry:
handle = ext4_journal_start(inode, needed_blocks);
-   if (IS_ERR(handle))
-   return PTR_ERR(handle);
+   if (IS_ERR(handle)) {
+   ret = PTR_ERR(handle);
+   goto out;
+   }
if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
ret = nobh_prepare_write(page, from, to, ext4_get_block);
else
ret = block_prepare_write(page, from, to, ext4_get_block);
if (ret)
-   goto failure;
+   goto prepare_write_failed;
 
if (ext4_should_journal_data(inode)) {
ret = walk_page_buffers(handle, page_buffers(page),
from, to, NULL, do_journal_get_write_access);
-   if (ret)
-   /* fatal error, just put the handle and return */
-   ext4_journal_stop(handle);
}
-   return ret;
-
-failure:
-   ret2 = ext4_prepare_failure(file, page, from, to);
-   if (ret2 < 0)
-   return ret2;
+prepare_write_failed:
+   if (ret)
+   ext4_journal_stop(handle);
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
-   /* retry number exceeded, or other error like -EDQUOT */
+out:
return ret;
 }
 

-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html