Re: [PATCH 2/2] Btrfs: fix memory leak of pending_snapshot-inherit
On 02/07/13 07:02, Miao Xie wrote: The argument inherit of btrfs_ioctl_snap_create_transid() was assigned to NULL during we created the snapshots, so we didn't free it though we called kfree() in the caller. But since we are sure the snapshot creation is done after the function - btrfs_ioctl_snap_create_transid() - completes, it is safe that we don't assign the pointer inherit to NULL, and just free it in the caller of btrfs_ioctl_snap_create_transid(). In this way, the code can become more readable. NAK. The snapshot creation is triggered from btrfs_commit_transaction, I don't want to implicitly rely on commit_transaction being called for each snapshot created. I'm not even sure the async path really commits the transaction. The responsibility for the creation is passed to the pending_snapshot data structure, and so should the responsibility for the inherit struct. -Arne Reported-by: Alex Lyakas alex.bt...@zadarastorage.com Cc: Arne Jansen sensi...@gmx.net Signed-off-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/ioctl.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 02d3035..40f2fbf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, - struct btrfs_qgroup_inherit **inherit) + struct btrfs_qgroup_inherit *inherit) { struct btrfs_trans_handle *trans; struct btrfs_key key; @@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root *root, if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, -inherit ? *inherit : NULL); + ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, inherit); if (ret) goto fail; @@ -530,7 +529,7 @@ fail: static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, -bool readonly, struct btrfs_qgroup_inherit **inherit) +bool readonly, struct btrfs_qgroup_inherit *inherit) { struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; @@ -549,10 +548,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, pending_snapshot-dentry = dentry; pending_snapshot-root = root; pending_snapshot-readonly = readonly; - if (inherit) { - pending_snapshot-inherit = *inherit; - *inherit = NULL;/* take responsibility to free it */ - } + pending_snapshot-inherit = inherit; trans = btrfs_start_transaction(root-fs_info-extent_root, 6); if (IS_ERR(trans)) { @@ -692,7 +688,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, int namelen, struct btrfs_root *snap_src, u64 *async_transid, bool readonly, -struct btrfs_qgroup_inherit **inherit) +struct btrfs_qgroup_inherit *inherit) { struct inode *dir = parent-dentry-d_inode; struct dentry *dentry; @@ -1454,7 +1450,7 @@ out: static noinline int btrfs_ioctl_snap_create_transid(struct file *file, char *name, unsigned long fd, int subvol, u64 *transid, bool readonly, - struct btrfs_qgroup_inherit **inherit) + struct btrfs_qgroup_inherit *inherit) { int namelen; int ret = 0; @@ -1563,7 +1559,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ret = btrfs_ioctl_snap_create_transid(file, vol_args-name, vol_args-fd, subvol, ptr, - readonly, inherit); + readonly, inherit); if (ret == 0 ptr copy_to_user(arg + -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] Btrfs: fix memory leak of pending_snapshot-inherit
Arne, Miao, I also agree that it is better to move this responsibility to create_pending_snapshot(). Alex. On Thu, Feb 7, 2013 at 10:43 AM, Arne Jansen sensi...@gmx.net wrote: On 02/07/13 07:02, Miao Xie wrote: The argument inherit of btrfs_ioctl_snap_create_transid() was assigned to NULL during we created the snapshots, so we didn't free it though we called kfree() in the caller. But since we are sure the snapshot creation is done after the function - btrfs_ioctl_snap_create_transid() - completes, it is safe that we don't assign the pointer inherit to NULL, and just free it in the caller of btrfs_ioctl_snap_create_transid(). In this way, the code can become more readable. NAK. The snapshot creation is triggered from btrfs_commit_transaction, I don't want to implicitly rely on commit_transaction being called for each snapshot created. I'm not even sure the async path really commits the transaction. The responsibility for the creation is passed to the pending_snapshot data structure, and so should the responsibility for the inherit struct. -Arne Reported-by: Alex Lyakas alex.bt...@zadarastorage.com Cc: Arne Jansen sensi...@gmx.net Signed-off-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/ioctl.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 02d3035..40f2fbf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, - struct btrfs_qgroup_inherit **inherit) + struct btrfs_qgroup_inherit *inherit) { struct btrfs_trans_handle *trans; struct btrfs_key key; @@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root *root, if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, -inherit ? *inherit : NULL); + ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, inherit); if (ret) goto fail; @@ -530,7 +529,7 @@ fail: static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, -bool readonly, struct btrfs_qgroup_inherit **inherit) +bool readonly, struct btrfs_qgroup_inherit *inherit) { struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; @@ -549,10 +548,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, pending_snapshot-dentry = dentry; pending_snapshot-root = root; pending_snapshot-readonly = readonly; - if (inherit) { - pending_snapshot-inherit = *inherit; - *inherit = NULL;/* take responsibility to free it */ - } + pending_snapshot-inherit = inherit; trans = btrfs_start_transaction(root-fs_info-extent_root, 6); if (IS_ERR(trans)) { @@ -692,7 +688,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, int namelen, struct btrfs_root *snap_src, u64 *async_transid, bool readonly, -struct btrfs_qgroup_inherit **inherit) +struct btrfs_qgroup_inherit *inherit) { struct inode *dir = parent-dentry-d_inode; struct dentry *dentry; @@ -1454,7 +1450,7 @@ out: static noinline int btrfs_ioctl_snap_create_transid(struct file *file, char *name, unsigned long fd, int subvol, u64 *transid, bool readonly, - struct btrfs_qgroup_inherit **inherit) + struct btrfs_qgroup_inherit *inherit) { int namelen; int ret = 0; @@ -1563,7 +1559,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ret = btrfs_ioctl_snap_create_transid(file, vol_args-name, vol_args-fd, subvol, ptr, - readonly, inherit); + readonly, inherit); if (ret == 0 ptr copy_to_user(arg + -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] Btrfs: fix memory leak of pending_snapshot-inherit
On Thu, 07 Feb 2013 09:43:47 +0100, Arne Jansen wrote: On 02/07/13 07:02, Miao Xie wrote: The argument inherit of btrfs_ioctl_snap_create_transid() was assigned to NULL during we created the snapshots, so we didn't free it though we called kfree() in the caller. But since we are sure the snapshot creation is done after the function - btrfs_ioctl_snap_create_transid() - completes, it is safe that we don't assign the pointer inherit to NULL, and just free it in the caller of btrfs_ioctl_snap_create_transid(). In this way, the code can become more readable. NAK. The snapshot creation is triggered from btrfs_commit_transaction, I don't want to implicitly rely on commit_transaction being called for each snapshot created. I'm not even sure the async path really commits the transaction. The responsibility for the creation is passed to the pending_snapshot data structure, and so should the responsibility for the inherit struct. I don't agree with you. We are sure the async path really commits the transaction because we pass 1 as the value of the third argument into btrfs_commit_transaction_async(). It means we must wait for the completion of the current transaction. So Freeing the inherit struct in the caller is safe. Besides that, the pending_snapshot data structure is also allocated and freed by the same function in fact, why not use this style for the inherit struct. I think it is more readable. Assigning a pointer to be NULL and freeing it in the caller is very strange for the people who reads the code. (It is also the reason why I made the mistake at the beginning.) So I think my patch is reasonable. Thanks Miao -Arne Reported-by: Alex Lyakas alex.bt...@zadarastorage.com Cc: Arne Jansen sensi...@gmx.net Signed-off-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/ioctl.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 02d3035..40f2fbf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, - struct btrfs_qgroup_inherit **inherit) + struct btrfs_qgroup_inherit *inherit) { struct btrfs_trans_handle *trans; struct btrfs_key key; @@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root *root, if (IS_ERR(trans)) return PTR_ERR(trans); -ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, - inherit ? *inherit : NULL); +ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, inherit); if (ret) goto fail; @@ -530,7 +529,7 @@ fail: static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, - bool readonly, struct btrfs_qgroup_inherit **inherit) + bool readonly, struct btrfs_qgroup_inherit *inherit) { struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; @@ -549,10 +548,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, pending_snapshot-dentry = dentry; pending_snapshot-root = root; pending_snapshot-readonly = readonly; -if (inherit) { -pending_snapshot-inherit = *inherit; -*inherit = NULL;/* take responsibility to free it */ -} +pending_snapshot-inherit = inherit; trans = btrfs_start_transaction(root-fs_info-extent_root, 6); if (IS_ERR(trans)) { @@ -692,7 +688,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, int namelen, struct btrfs_root *snap_src, u64 *async_transid, bool readonly, - struct btrfs_qgroup_inherit **inherit) + struct btrfs_qgroup_inherit *inherit) { struct inode *dir = parent-dentry-d_inode; struct dentry *dentry; @@ -1454,7 +1450,7 @@ out: static noinline int btrfs_ioctl_snap_create_transid(struct file *file, char *name, unsigned long fd, int subvol, u64 *transid, bool readonly, -struct btrfs_qgroup_inherit **inherit) +struct btrfs_qgroup_inherit *inherit) { int namelen; int ret = 0; @@ -1563,7 +1559,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ret = btrfs_ioctl_snap_create_transid(file, vol_args-name, vol_args-fd, subvol, ptr, -
Re: [PATCH 2/2] Btrfs: fix memory leak of pending_snapshot-inherit
On 02/07/13 10:28, Miao Xie wrote: On Thu, 07 Feb 2013 09:43:47 +0100, Arne Jansen wrote: On 02/07/13 07:02, Miao Xie wrote: The argument inherit of btrfs_ioctl_snap_create_transid() was assigned to NULL during we created the snapshots, so we didn't free it though we called kfree() in the caller. But since we are sure the snapshot creation is done after the function - btrfs_ioctl_snap_create_transid() - completes, it is safe that we don't assign the pointer inherit to NULL, and just free it in the caller of btrfs_ioctl_snap_create_transid(). In this way, the code can become more readable. NAK. The snapshot creation is triggered from btrfs_commit_transaction, I don't want to implicitly rely on commit_transaction being called for each snapshot created. I'm not even sure the async path really commits the transaction. The responsibility for the creation is passed to the pending_snapshot data structure, and so should the responsibility for the inherit struct. I don't agree with you. We are sure the async path really commits the transaction because we pass 1 as the value of the third argument into btrfs_commit_transaction_async(). It means we must wait for the completion of the current transaction. So Freeing the inherit struct in the caller is safe. I see your point. But speaking of readability, I have to trace quite a lot of functions to see that even the async path waits for the snapshot to be created. Which makes the name 'async' sort of pointless. So from what I've read so far I _think_ your patch does the right thing. Thanks for clearing that up. -Arne Besides that, the pending_snapshot data structure is also allocated and freed by the same function in fact, why not use this style for the inherit struct. I think it is more readable. Assigning a pointer to be NULL and freeing it in the caller is very strange for the people who reads the code. (It is also the reason why I made the mistake at the beginning.) So I think my patch is reasonable. Thanks Miao -Arne Reported-by: Alex Lyakas alex.bt...@zadarastorage.com Cc: Arne Jansen sensi...@gmx.net Signed-off-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/ioctl.c | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 02d3035..40f2fbf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, - struct btrfs_qgroup_inherit **inherit) + struct btrfs_qgroup_inherit *inherit) { struct btrfs_trans_handle *trans; struct btrfs_key key; @@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root *root, if (IS_ERR(trans)) return PTR_ERR(trans); - ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, - inherit ? *inherit : NULL); + ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, inherit); if (ret) goto fail; @@ -530,7 +529,7 @@ fail: static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, u64 *async_transid, - bool readonly, struct btrfs_qgroup_inherit **inherit) + bool readonly, struct btrfs_qgroup_inherit *inherit) { struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; @@ -549,10 +548,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, pending_snapshot-dentry = dentry; pending_snapshot-root = root; pending_snapshot-readonly = readonly; - if (inherit) { - pending_snapshot-inherit = *inherit; - *inherit = NULL;/* take responsibility to free it */ - } + pending_snapshot-inherit = inherit; trans = btrfs_start_transaction(root-fs_info-extent_root, 6); if (IS_ERR(trans)) { @@ -692,7 +688,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, int namelen, struct btrfs_root *snap_src, u64 *async_transid, bool readonly, - struct btrfs_qgroup_inherit **inherit) + struct btrfs_qgroup_inherit *inherit) { struct inode *dir = parent-dentry-d_inode; struct dentry *dentry; @@ -1454,7 +1450,7 @@ out: static noinline int btrfs_ioctl_snap_create_transid(struct file *file, char *name, unsigned long fd, int subvol, u64 *transid, bool readonly, - struct btrfs_qgroup_inherit **inherit) + struct
[RFC][PATCH] Btrfs: fix deadlock due to unsubmitted
The deadlock problem happened when running fsstress(a test program in LTP). Steps to reproduce: # mkfs.btrfs -b 100M partition # mount partition mnt # Path/fsstress -p 3 -n 1000 -d mnt The reason is: btrfs_direct_IO() |-do_direct_IO() |-get_page() |-get_blocks() | |-btrfs_delalloc_resereve_space() | |-btrfs_add_ordered_extent() --- Add a new ordered extent |-dio_send_cur_page(page0) -- We didn't submit bio here |-get_page() |-get_blocks() |-btrfs_delalloc_resereve_space() |-flush_space() |-btrfs_start_ordered_extent() |-wait_event() -- Wait the completion of the ordered extent that is mentioned above But because we didn't submit the bio that is mentioned above, the ordered extent can not complete, we would wait for its completion forever. There are two methods which can fix this deadlock problem: 1. submit the bio before we invoke get_blocks() 2. reserve the space before we do dio Though the 1st is the simplest way, we need modify the code of VFS, and it is likely to break contiguous requests, and introduce performance regression for the other filesystems. So we have to choose the 2nd way. Signed-off-by: Miao Xie mi...@cn.fujitsu.com Cc: Josef Bacik jba...@fusionio.com --- fs/btrfs/extent-tree.c |3 +- fs/btrfs/inode.c | 81 --- 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 85b8454..ca9afc4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4670,7 +4670,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) spin_lock(BTRFS_I(inode)-lock); dropped = drop_outstanding_extent(inode); - to_free = calc_csum_metadata_size(inode, num_bytes, 0); + if (num_bytes) + to_free = calc_csum_metadata_size(inode, num_bytes, 0); spin_unlock(BTRFS_I(inode)-lock); if (dropped 0) to_free += btrfs_calc_trans_metadata_size(root, dropped); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ca7ace7..c5d829d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6004,16 +6004,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, u64 len = bh_result-b_size; struct btrfs_trans_handle *trans; int unlock_bits = EXTENT_LOCKED; - int ret; + int ret = 0; if (create) { - ret = btrfs_delalloc_reserve_space(inode, len); - if (ret) - return ret; + spin_lock(BTRFS_I(inode)-lock); + BTRFS_I(inode)-outstanding_extents++; + spin_unlock(BTRFS_I(inode)-lock); unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; - } else { + } else len = min_t(u64, len, root-sectorsize); - } lockstart = start; lockend = start + len - 1; @@ -6025,14 +6024,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (lock_extent_direct(inode, lockstart, lockend, cached_state, create)) return -ENOTBLK; - if (create) { - ret = set_extent_bit(BTRFS_I(inode)-io_tree, lockstart, -lockend, EXTENT_DELALLOC, NULL, -cached_state, GFP_NOFS); - if (ret) - goto unlock_err; - } - em = btrfs_get_extent(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { ret = PTR_ERR(em); @@ -6064,7 +6055,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (!create (em-block_start == EXTENT_MAP_HOLE || test_bit(EXTENT_FLAG_PREALLOC, em-flags))) { free_extent_map(em); - ret = 0; goto unlock_err; } @@ -6162,6 +6152,11 @@ unlock: */ if (start + len i_size_read(inode)) i_size_write(inode, start + len); + + ret = set_extent_bit(BTRFS_I(inode)-io_tree, lockstart, +lockstart + len - 1, EXTENT_DELALLOC, NULL, +cached_state, GFP_NOFS); + BUG_ON(ret); } /* @@ -6170,24 +6165,9 @@ unlock: * aren't using if there is any left over space. */ if (lockstart lockend) { - if (create len lockend - lockstart) { - clear_extent_bit(BTRFS_I(inode)-io_tree, lockstart, -lockstart + len - 1, -unlock_bits | EXTENT_DEFRAG, 1, 0, -cached_state,
Re: [RFC][PATCH] Btrfs: fix deadlock due to unsubmitted
On Thu, Feb 07, 2013 at 03:12:07AM -0700, Miao Xie wrote: The deadlock problem happened when running fsstress(a test program in LTP). Steps to reproduce: # mkfs.btrfs -b 100M partition # mount partition mnt # Path/fsstress -p 3 -n 1000 -d mnt The reason is: btrfs_direct_IO() |-do_direct_IO() |-get_page() |-get_blocks() | |-btrfs_delalloc_resereve_space() | |-btrfs_add_ordered_extent() --- Add a new ordered extent |-dio_send_cur_page(page0) -- We didn't submit bio here |-get_page() |-get_blocks() |-btrfs_delalloc_resereve_space() |-flush_space() |-btrfs_start_ordered_extent() |-wait_event() -- Wait the completion of the ordered extent that is mentioned above But because we didn't submit the bio that is mentioned above, the ordered extent can not complete, we would wait for its completion forever. There are two methods which can fix this deadlock problem: 1. submit the bio before we invoke get_blocks() 2. reserve the space before we do dio Though the 1st is the simplest way, we need modify the code of VFS, and it is likely to break contiguous requests, and introduce performance regression for the other filesystems. So we have to choose the 2nd way. The 3rd option is to have get_blocks return -EAGAIN to the direct-io.c code and let the higher levels submit the bios they have built. Josef will probably go for option #4, which is dropping the generic code completely and doing it all ourselves. But I do like your approach, it makes sense here. -chris -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC][PATCH] Btrfs: fix deadlock due to unsubmitted
On Thu, Feb 07, 2013 at 03:12:07AM -0700, Miao Xie wrote: The deadlock problem happened when running fsstress(a test program in LTP). Steps to reproduce: # mkfs.btrfs -b 100M partition # mount partition mnt # Path/fsstress -p 3 -n 1000 -d mnt The reason is: btrfs_direct_IO() |-do_direct_IO() |-get_page() |-get_blocks() | |-btrfs_delalloc_resereve_space() | |-btrfs_add_ordered_extent() --- Add a new ordered extent |-dio_send_cur_page(page0) -- We didn't submit bio here |-get_page() |-get_blocks() |-btrfs_delalloc_resereve_space() |-flush_space() |-btrfs_start_ordered_extent() |-wait_event() -- Wait the completion of the ordered extent that is mentioned above But because we didn't submit the bio that is mentioned above, the ordered extent can not complete, we would wait for its completion forever. There are two methods which can fix this deadlock problem: 1. submit the bio before we invoke get_blocks() 2. reserve the space before we do dio Though the 1st is the simplest way, we need modify the code of VFS, and it is likely to break contiguous requests, and introduce performance regression for the other filesystems. So we have to choose the 2nd way. I ran into this a few weeks ago and as Chris said I opted for option 4, just do all the direct io stuff ourselves and ditch the generic stuff. This approach works for now though so I'm good with it. Signed-off-by: Miao Xie mi...@cn.fujitsu.com Cc: Josef Bacik jba...@fusionio.com --- fs/btrfs/extent-tree.c |3 +- fs/btrfs/inode.c | 81 --- 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 85b8454..ca9afc4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4670,7 +4670,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) spin_lock(BTRFS_I(inode)-lock); dropped = drop_outstanding_extent(inode); - to_free = calc_csum_metadata_size(inode, num_bytes, 0); + if (num_bytes) + to_free = calc_csum_metadata_size(inode, num_bytes, 0); spin_unlock(BTRFS_I(inode)-lock); if (dropped 0) to_free += btrfs_calc_trans_metadata_size(root, dropped); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ca7ace7..c5d829d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6004,16 +6004,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, u64 len = bh_result-b_size; struct btrfs_trans_handle *trans; int unlock_bits = EXTENT_LOCKED; - int ret; + int ret = 0; if (create) { - ret = btrfs_delalloc_reserve_space(inode, len); - if (ret) - return ret; + spin_lock(BTRFS_I(inode)-lock); + BTRFS_I(inode)-outstanding_extents++; + spin_unlock(BTRFS_I(inode)-lock); unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; - } else { + } else len = min_t(u64, len, root-sectorsize); - } lockstart = start; lockend = start + len - 1; @@ -6025,14 +6024,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (lock_extent_direct(inode, lockstart, lockend, cached_state, create)) return -ENOTBLK; - if (create) { - ret = set_extent_bit(BTRFS_I(inode)-io_tree, lockstart, - lockend, EXTENT_DELALLOC, NULL, - cached_state, GFP_NOFS); - if (ret) - goto unlock_err; - } - em = btrfs_get_extent(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { ret = PTR_ERR(em); @@ -6064,7 +6055,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (!create (em-block_start == EXTENT_MAP_HOLE || test_bit(EXTENT_FLAG_PREALLOC, em-flags))) { free_extent_map(em); - ret = 0; This doesn't look right, this should be left here. goto unlock_err; } @@ -6162,6 +6152,11 @@ unlock: */ if (start + len i_size_read(inode)) i_size_write(inode, start + len); + + ret = set_extent_bit(BTRFS_I(inode)-io_tree, lockstart, + lockstart + len - 1, EXTENT_DELALLOC, NULL, + cached_state, GFP_NOFS); + BUG_ON(ret); Return the error if there is one, there's no reason to add new BUG_ON()'s. Thanks, Josef -- To unsubscribe from this list: send the line
Re: [PATCH 2/2, RFC] btrfs-progs: overhaul mkfs.btrfs -r option
On 2/6/13 6:08 PM, David Sterba wrote: On Tue, Jan 29, 2013 at 02:41:08PM -0600, Eric Sandeen wrote: The manpage for the -r option simply says that it will copy the path specified to -r into the newly made filesystem. There's not a lot of reason to treat that option as differently as it is now - today it ignores discard, fs size, and mixed options, for example. It also failed to check whether the target device was mounted before proceeding. Etc... Rework things so that we really follow the same paths whether or not -r is specified, but with one special case for -r: * If the device does not exist, it will be created as a regular file of the minimum size to hold the -r path, or of size specified by the -b option. This also changes a little behavior; it does not pre-fill the new file with zeros, but allows it to be sparse, and does not truncate an existing device file. If you want to start with an empty file, just don't point it at an existing file... Fixes numerous bugs and I find the changes in behaviour sane and reasonable. A quick test failed for me when the image file does not exist: $ rm image $ ./mkfs.btrfs -r . image ... not enough free space add_file_items failed unable to traverse_directory Making image is aborted. ret = -1 mkfs.btrfs: mkfs.c:1533: main: Assertion `!(ret)' failed. seems that the estimated size is not sufficient. ho hum, thanks for checking. I guess I only tried it on a very small source directory. For now, figuring out btrfs space usage for a given set of files is above my pay grade and experience, I'm afraid. :( -Eric 'du' on the directory (without the image itself) gives me 59M, the image after failed mkfs has 102M, and it's empty when I try to mount it. Using the progs .git directory (18M) as sourcedir also failed, but it worked with man/ subdirectory (94K). david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2, RFC] btrfs-progs: overhaul mkfs.btrfs -r option
On Thu, Feb 07, 2013 at 08:52:43AM -0700, Eric Sandeen wrote: On 2/6/13 6:08 PM, David Sterba wrote: On Tue, Jan 29, 2013 at 02:41:08PM -0600, Eric Sandeen wrote: The manpage for the -r option simply says that it will copy the path specified to -r into the newly made filesystem. There's not a lot of reason to treat that option as differently as it is now - today it ignores discard, fs size, and mixed options, for example. It also failed to check whether the target device was mounted before proceeding. Etc... Rework things so that we really follow the same paths whether or not -r is specified, but with one special case for -r: * If the device does not exist, it will be created as a regular file of the minimum size to hold the -r path, or of size specified by the -b option. This also changes a little behavior; it does not pre-fill the new file with zeros, but allows it to be sparse, and does not truncate an existing device file. If you want to start with an empty file, just don't point it at an existing file... Fixes numerous bugs and I find the changes in behaviour sane and reasonable. A quick test failed for me when the image file does not exist: $ rm image $ ./mkfs.btrfs -r . image ... not enough free space add_file_items failed unable to traverse_directory Making image is aborted. ret = -1 mkfs.btrfs: mkfs.c:1533: main: Assertion `!(ret)' failed. seems that the estimated size is not sufficient. ho hum, thanks for checking. I guess I only tried it on a very small source directory. For now, figuring out btrfs space usage for a given set of files is above my pay grade and experience, I'm afraid. :( We should probably make a way to compact a given FS down to minimal size. Then we don't have to worry about size estimates at all. In other words, two passes instead of one. -chris -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs balance - hang/crash
o.k., I've deleted some files from my btrfs filesystem rebooted the machine and was able to cancel the balance operation thereafter. So far, everything seems to be working again. -Mike On Wed, Feb 6, 2013 at 11:26 PM, Michael Schneider mike2.schnei...@googlemail.com wrote: # btrfs fi show failed to read /dev/sr0 Label: 'bootssd' uuid: ac89584c-c757-488e-8a2a-5ee5a5484dde Total devices 1 FS bytes used 34.98GB devid1 size 56.00GB used 41.07GB path /dev/dm-1 Btrfs v0.19+ # btrfs fi df / Data: total=35.00GB, used=33.34GB System, DUP: total=32.00MB, used=16.00KB System: total=4.00MB, used=0.00 Metadata, DUP: total=3.00GB, used=1.63GB On Wed, Feb 6, 2013 at 11:21 PM, Hugo Mills h...@carfax.org.uk wrote: On Wed, Feb 06, 2013 at 11:12:14PM +0100, Michael Schneider wrote: Hi, my btrfs hangs when doing a balance operation. I'm using a 3.7.1 kernel from opensuse: linux-opzz 3.7.1-2.10-m4 #11 SMP PREEMPT Fri Jan 11 18:04:04 CET 2013 x86_64 x86_64 x86_64 GNU/Linux and Btrfs v0.19+ I did a scrub which completed without errors. Then I tried btrfs filesystem balance / which work fine for the first 23 of 46 chunks, then ist stopped processing further chunks, but contiued to consume large amounts of the CPU. The system logged filled quickly with messages like: [ 347.237658] btrfs: block rsv returned -28 [snip] Any idea what's going on? You're running out of space. Can you post the output of: # btrfs fi df /mountpoint # btrfs fi show this will give us a bit more info about likely scenarios. Also, josef published a patch(*) in the last few hours, on this list, which may help deal with the issue. Hugo. (*) Btrfs: rework the overcommit logic to be based on the total size -- === Hugo Mills: hugo@... carfax.org.uk | darksatanic.net | lug.org.uk === PGP key: 515C238D from wwwkeys.eu.pgp.net or http://www.carfax.org.uk --- What do you give the man who has everything? -- Penicillin is --- a good start... -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: steal from global reserve if we are cleaning up orphans
Sometimes xfstest 83 will fail to remount the scratch device because we've gotten ourselves so full that we cannot cleanup the orphan items. In this case check to see if we're doing the orphan cleanup and if we are allow us to steal our reservation from the global block rsv. With this patch I've not been able to reproduce the failed mount problem. Thanks, Signed-off-by: Josef Bacik jba...@fusionio.com --- fs/btrfs/ctree.h |5 + fs/btrfs/extent-tree.c | 10 ++ fs/btrfs/inode.c |5 - 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 50d1be9..2b68f88 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1237,6 +1237,11 @@ struct seq_list { u64 seq; }; +enum btrfs_orphan_cleanup_state { + ORPHAN_CLEANUP_STARTED = 1, + ORPHAN_CLEANUP_DONE = 2, +}; + /* fs_info */ struct reloc_control; struct btrfs_device; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 53c2898..8dbb45c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4099,6 +4099,16 @@ again: goto again; out: + if (ret == -ENOSPC + unlikely(root-orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { + struct btrfs_block_rsv *global_rsv = + root-fs_info-global_block_rsv; + + if (block_rsv != global_rsv + !btrfs_block_rsv_migrate(global_rsv, block_rsv, +orig_bytes)) + ret = 0; + } if (flushing) { spin_lock(space_info-lock); space_info-flush = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 44d95d1..73e9409 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2218,11 +2218,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) } } -enum btrfs_orphan_cleanup_state { - ORPHAN_CLEANUP_STARTED = 1, - ORPHAN_CLEANUP_DONE = 2, -}; - /* * This is called in transaction commit time. If there are no orphan * files in the subvolume, it removes orphan item and frees block_rsv -- 1.7.7.6 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: steal from global reserve if we are cleaning up orphans V2
Sometimes xfstest 83 will fail to remount the scratch device because we've gotten ourselves so full that we cannot cleanup the orphan items. In this case check to see if we're doing the orphan cleanup and if we are allow us to steal our reservation from the global block rsv. With this patch I've not been able to reproduce the failed mount problem. Thanks, Signed-off-by: Josef Bacik jba...@fusionio.com --- V1-V2: use block_rsv_use_bytes not migrate since we call add after we succeed in reserve_metadata_bytes. fs/btrfs/ctree.h |5 + fs/btrfs/extent-tree.c | 11 +++ fs/btrfs/inode.c |5 - 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 50d1be9..2b68f88 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1237,6 +1237,11 @@ struct seq_list { u64 seq; }; +enum btrfs_orphan_cleanup_state { + ORPHAN_CLEANUP_STARTED = 1, + ORPHAN_CLEANUP_DONE = 2, +}; + /* fs_info */ struct reloc_control; struct btrfs_device; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 53c2898..b1534ba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -102,6 +102,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, u64 num_bytes, int reserve); +static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, + u64 num_bytes); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -4099,6 +4101,15 @@ again: goto again; out: + if (ret == -ENOSPC + unlikely(root-orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { + struct btrfs_block_rsv *global_rsv = + root-fs_info-global_block_rsv; + + if (block_rsv != global_rsv + !block_rsv_use_bytes(global_rsv, orig_bytes)) + ret = 0; + } if (flushing) { spin_lock(space_info-lock); space_info-flush = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 44d95d1..73e9409 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2218,11 +2218,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) } } -enum btrfs_orphan_cleanup_state { - ORPHAN_CLEANUP_STARTED = 1, - ORPHAN_CLEANUP_DONE = 2, -}; - /* * This is called in transaction commit time. If there are no orphan * files in the subvolume, it removes orphan item and frees block_rsv -- 1.7.7.6 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: cleanup orphan reservation if truncate fails
I noticed we were getting lots of warnings with xfstest 83 because we have reservations outstanding. This is because we moved the orphan add outside of the truncate, but we don't actually cleanup our reservation if something fails. This fixes the problem and I no longer see warnings. Thanks, Signed-off-by: Josef Bacik jba...@fusionio.com --- fs/btrfs/inode.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 73e9409..905297f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2538,6 +2538,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) goto out; ret = btrfs_truncate(inode); + if (ret) + btrfs_orphan_del(NULL, inode); } else { nr_unlink++; } -- 1.7.7.6 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC] Btrfs: Allow the compressed extent size limit to be modified v2
Provide for modification of the limit of compressed extent size utilizing mount-time configuration settings. The size of compressed extents was limited to 128K, which leads to fragmentation of the extents (although the extents themselves may still be located contiguously). This limit is put in place to ease the RAM required when spreading compression across several CPUs, and to make sure the amount of IO required to do a random read is reasonably small. Signed-off-by: Mitch Harder mitch.har...@sabayonlinux.org --- Changelog v1 - v2: - Use more self-documenting variable name: compressed_extent_size - max_compressed_extent_kb - Use #define BTRFS_DEFAULT_MAX_COMPR_EXTENTS instead of raw 128. - Fix min calculation for nr_pages. - Comment cleanup. - Use more self-documenting mount option parameter: compressed_extent_size - max_compressed_extent_kb - Fix formatting in btrfs_show_options. --- fs/btrfs/ctree.h | 6 ++ fs/btrfs/disk-io.c| 1 + fs/btrfs/inode.c | 8 fs/btrfs/relocation.c | 7 --- fs/btrfs/super.c | 20 +++- 5 files changed, 34 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 547b7b0..a62f20c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -191,6 +191,9 @@ static int btrfs_csum_sizes[] = { 4, 0 }; /* ioprio of readahead is set to idle */ #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) +/* Default value for maximum compressed extent size (kb) */ +#define BTRFS_DEFAULT_MAX_COMPR_EXTENTS128 + /* * The key defines the order in the tree, and so it also defines (optimal) * block layout. @@ -1477,6 +1480,8 @@ struct btrfs_fs_info { unsigned data_chunk_allocations; unsigned metadata_ratio; + unsigned max_compressed_extent_kb; + void *bdev_holder; /* private scrub information */ @@ -1829,6 +1834,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_CHECK_INTEGRITY(1 20) #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 21) #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 22) +#define BTRFS_MOUNT_COMPR_EXTENT_SIZE (1 23) #define btrfs_clear_opt(o, opt)((o) = ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 830bc17..775e7ba 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2056,6 +2056,7 @@ int open_ctree(struct super_block *sb, fs_info-trans_no_join = 0; fs_info-free_chunk_space = 0; fs_info-tree_mod_log = RB_ROOT; + fs_info-max_compressed_extent_kb = BTRFS_DEFAULT_MAX_COMPR_EXTENTS; /* readahead state */ INIT_RADIX_TREE(fs_info-reada_tree, GFP_NOFS ~__GFP_WAIT); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 148abeb..78fc6eb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -346,8 +346,8 @@ static noinline int compress_file_range(struct inode *inode, unsigned long nr_pages_ret = 0; unsigned long total_compressed = 0; unsigned long total_in = 0; - unsigned long max_compressed = 128 * 1024; - unsigned long max_uncompressed = 128 * 1024; + unsigned long max_compressed = root-fs_info-max_compressed_extent_kb * 1024; + unsigned long max_uncompressed = root-fs_info-max_compressed_extent_kb * 1024; int i; int will_compress; int compress_type = root-fs_info-compress_type; @@ -361,7 +361,7 @@ static noinline int compress_file_range(struct inode *inode, again: will_compress = 0; nr_pages = (end PAGE_CACHE_SHIFT) - (start PAGE_CACHE_SHIFT) + 1; - nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE); + nr_pages = min(nr_pages, max_compressed / PAGE_CACHE_SIZE); /* * we don't want to send crud past the end of i_size through @@ -386,7 +386,7 @@ again: * * We also want to make sure the amount of IO required to do * a random read is reasonably small, so we limit the size of -* a compressed extent to 128k. +* a compressed extent. */ total_compressed = min(total_compressed, max_uncompressed); num_bytes = (end - start + blocksize) ~(blocksize - 1); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 300e09a..64bbc9e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -144,7 +144,7 @@ struct tree_block { unsigned int key_ready:1; }; -#define MAX_EXTENTS 128 +#define MAX_EXTENTS 512 struct file_extent_cluster { u64 start; @@ -3055,6 +3055,7 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, struct file_extent_cluster *cluster) { int ret; + struct btrfs_fs_info *fs_info = BTRFS_I(inode)-root-fs_info; if (cluster-nr 0 extent_key-objectid != cluster-end + 1) { ret =
Re: [RFC] Btrfs: Allow the compressed extent size limit to be modified v2
On Thu, Feb 07, 2013 at 03:38:34PM -0600, Mitch Harder wrote: Provide for modification of the limit of compressed extent size utilizing mount-time configuration settings. The size of compressed extents was limited to 128K, which leads to fragmentation of the extents (although the extents themselves may still be located contiguously). This limit is put in place to ease the RAM required when spreading compression across several CPUs, and to make sure the amount of IO required to do a random read is reasonably small. Signed-off-by: Mitch Harder mitch.har...@sabayonlinux.org --- Changelog v1 - v2: - Use more self-documenting variable name: compressed_extent_size - max_compressed_extent_kb - Use #define BTRFS_DEFAULT_MAX_COMPR_EXTENTS instead of raw 128. - Fix min calculation for nr_pages. - Comment cleanup. - Use more self-documenting mount option parameter: compressed_extent_size - max_compressed_extent_kb - Fix formatting in btrfs_show_options. Cool, thanks for cleaning those up. It looks a lot better now. - z -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC] Btrfs: Allow the compressed extent size limit to be modified v2
On Thu, Feb 07, 2013 at 03:38:34PM -0600, Mitch Harder wrote: --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -144,7 +144,7 @@ struct tree_block { unsigned int key_ready:1; }; -#define MAX_EXTENTS 128 +#define MAX_EXTENTS 512 Is this really related to compression? IIRC I've seen it only in context of batch work in reloc, but not anywhere near compression. (I may be wrong of course, just checking). david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/6] Btrfs-progs: move btrfs-select-super.c
This patch moves btrfs-select-super.c - cmds-rescue-super-ops.c This is done in preparation to merge the select-super functionality to btrfs. The naming is because we will also merge btrfs-dump-super.c from Josef Bacik Signed-off-by: Ian Kumlien po...@demius.net --- btrfs-select-super.c = cmds-rescue-super-ops.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename btrfs-select-super.c = cmds-rescue-super-ops.c (100%) diff --git a/btrfs-select-super.c b/cmds-rescue-super-ops.c similarity index 100% rename from btrfs-select-super.c rename to cmds-rescue-super-ops.c -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/6] Btrfs-progs: Rename btrfsck.c - cmds-check.c
In preparation for merging btrfsck functionality in to btrfs. Signed-off-by: Ian Kumlien po...@demius.net --- btrfsck.c = cmds-check.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename btrfsck.c = cmds-check.c (100%) diff --git a/btrfsck.c b/cmds-check.c similarity index 100% rename from btrfsck.c rename to cmds-check.c -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Btrfs-progs: Merge btrfs-restore, btrfsck, btrfs-select-super, btrfs-dump-super and btrfs-debug-tree in to btrfs
Hi, This patch series moves some of the commands around to reflect that they are now subcommands of btrfs. As a stage in this we also add support for btrfs being called as btrfsck which yeilds the, now(?), starnard btrfs check or fsck.btrfs which is a noop to avoid complications with distributions. We also merge Josef Baciks btrfs-dump-super in patch number 4. This patchset has been rebased on top of David Sterbas latest WIP changes. Comments? Ideas? Suggestions? Please help me with all the help text and verify that it's correct enough ;) -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/6] Btrfs-progs: move debug-tree.c - cmds-rescue-debug-tree.c
The btrfs-debug-tree functionality will be integrated in to btrfs as btrfs rescue debug-tree Signed-off-by: Ian Kumlien po...@demius.net --- debug-tree.c = cmds-rescue-debug-tree.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename debug-tree.c = cmds-rescue-debug-tree.c (100%) diff --git a/debug-tree.c b/cmds-rescue-debug-tree.c similarity index 100% rename from debug-tree.c rename to cmds-rescue-debug-tree.c -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/6] Btrfs-progs: add the rescue section to btrfs
the btrfs command now lists: btrfs rescue select-super -s number device Select a superblock btrfs rescue dump-super device Dump a superblock to disk btrfs rescue debug-tree [options] device Debug the filesystem btrfs-dump-super.c was imported in to cmds-rescue-super-ops.c This patch integrates all the functionality... cmds-rescue.c is used to glue cmds-rescue-debug-tree.c, cmds-rescue-restore.c and cmds-rescue-super-ops.c together to make the source files more managable. Signed-off-by: Ian Kumlien po...@demius.net --- Makefile | 34 +--- btrfs-dump-super.c | 87 --- btrfs.c | 2 + cmds-rescue-debug-tree.c | 44 ++-- cmds-rescue-super-ops.c | 103 +-- cmds-rescue.c| 48 ++ cmds-restore.c | 42 +++ commands.h | 8 +++- 8 files changed, 195 insertions(+), 173 deletions(-) delete mode 100644 btrfs-dump-super.c create mode 100644 cmds-rescue.c diff --git a/Makefile b/Makefile index 94541b2..7e2db76 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,9 @@ objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ send-stream.o send-utils.o qgroup.o raid6.o cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \ cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \ - cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o + cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o \ + cmds-rescue.o cmds-rescue-super-ops.o \ + cmds-rescue-debug-tree.o cmds-restore.o CHECKFLAGS= -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ -Wuninitialized -Wshadow -Wundef @@ -17,8 +19,7 @@ DEPFLAGS = -Wp,-MMD,$(@D)/.$(@F).d,-MT,$@ INSTALL = install prefix ?= /usr/local bindir = $(prefix)/bin -LIBS=-luuid -lm -RESTORE_LIBS=-lz -llzo2 +LIBS=-luuid -lm -lz -llzo2 ifeq ($(origin V), command line) BUILD_VERBOSE = $(V) @@ -35,10 +36,9 @@ endif MAKEOPTS = --no-print-directory Q=$(Q) -progs = btrfsctl mkfs.btrfs btrfs-debug-tree btrfs-show btrfs-vol \ - btrfs btrfs-map-logical btrfs-image btrfs-zero-log btrfs-convert \ - btrfs-find-root btrfs-restore btrfstune btrfs-show-super \ - btrfs-dump-super +progs = btrfsctl mkfs.btrfs btrfs-show btrfs-vol btrfs \ + btrfs-map-logical btrfs-image btrfs-zero-log btrfs-convert \ + btrfs-find-root btrfstune \ # Create all the static targets static_objects = $(patsubst %.o, %.static.o, $(objects)) @@ -95,10 +95,6 @@ btrfs-find-root: $(objects) find-root.o @echo [LD] $@ $(Q)$(CC) $(CFLAGS) -o btrfs-find-root find-root.o $(objects) $(LDFLAGS) $(LIBS) -btrfs-restore: $(objects) restore.o - @echo [LD] $@ - $(Q)$(CC) $(CFLAGS) -o btrfs-restore restore.o $(objects) $(LDFLAGS) $(LIBS) $(RESTORE_LIBS) - btrfsctl: $(objects) btrfsctl.o @echo [LD] $@ $(Q)$(CC) $(CFLAGS) -o btrfsctl btrfsctl.o $(objects) $(LDFLAGS) $(LIBS) @@ -115,10 +111,6 @@ mkfs.btrfs: $(objects) mkfs.o @echo [LD] $@ $(Q)$(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) -lblkid -btrfs-debug-tree: $(objects) debug-tree.o - @echo [LD] $@ - $(Q)$(CC) $(CFLAGS) -o btrfs-debug-tree $(objects) debug-tree.o $(LDFLAGS) $(LIBS) - btrfs-zero-log: $(objects) btrfs-zero-log.o @echo [LD] $@ $(Q)$(CC) $(CFLAGS) -o btrfs-zero-log $(objects) btrfs-zero-log.o $(LDFLAGS) $(LIBS) @@ -127,14 +119,6 @@ btrfs-show-super: $(objects) btrfs-show-super.o @echo [LD] $@ $(Q)$(CC) $(CFLAGS) -o btrfs-show-super $(objects) btrfs-show-super.o $(LDFLAGS) $(LIBS) -btrfs-select-super: $(objects) btrfs-select-super.o - @echo [LD] $@ - $(Q)$(CC) $(CFLAGS) -o btrfs-select-super $(objects) btrfs-select-super.o $(LDFLAGS) $(LIBS) - -btrfs-dump-super: $(objects) btrfs-dump-super.o - @echo [LD] $@ - $(Q)$(CC) $(CFLAGS) -o btrfs-dump-super $(objects) btrfs-dump-super.o $(LDFLAGS) $(LIBS) - btrfstune: $(objects) btrfstune.o @echo [LD] $@ $(Q)$(CC) $(CFLAGS) -o btrfstune $(objects) btrfstune.o $(LDFLAGS) $(LIBS) @@ -175,8 +159,8 @@ install-man: clean : @echo Cleaning - $(Q)rm -f $(progs) cscope.out *.o .*.d btrfs-convert btrfs-image btrfs-select-super \ - btrfs-zero-log btrfstune btrfs-dump-super dir-test ioctl-test quick-test \ + $(Q)rm -f $(progs) cscope.out *.o .*.d btrfs-convert btrfs-image \ + btrfs-zero-log btrfstune dir-test ioctl-test quick-test \ version.h $(Q)$(MAKE) $(MAKEOPTS) -C man $@ diff --git a/btrfs-dump-super.c b/btrfs-dump-super.c deleted file mode 100644 index 033140c..000 ---
[PATCH 5/6] Btrfs-progs: restore.c - cmds-restore.c
The btrfs-restore functionality will be integrated in btrs as btrfs restore Signed-off-by: Ian Kumlien po...@demius.net --- restore.c = cmds-restore.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename restore.c = cmds-restore.c (100%) diff --git a/restore.c b/cmds-restore.c similarity index 100% rename from restore.c rename to cmds-restore.c -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/6] Btrfs-progs: add btrfsck functionality to btrfs
This patch includes the functionality of btrfs, it's found as btrfs check however it makes the binary behave differently depending on what it's run as. btrfsck - will act like normal btrfsck fsck.btrfs - noop Signed-off-by: Ian Kumlien po...@demius.net --- Makefile | 8 ++- btrfs.c | 68 cmds-check.c | 40 --- commands.h | 3 +++ 4 files changed, 78 insertions(+), 41 deletions(-) diff --git a/Makefile b/Makefile index 2c2a500..94541b2 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ send-stream.o send-utils.o qgroup.o raid6.o cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \ cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \ - cmds-quota.o cmds-qgroup.o cmds-replace.o + cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o CHECKFLAGS= -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \ -Wuninitialized -Wshadow -Wundef @@ -35,7 +35,7 @@ endif MAKEOPTS = --no-print-directory Q=$(Q) -progs = btrfsctl mkfs.btrfs btrfs-debug-tree btrfs-show btrfs-vol btrfsck \ +progs = btrfsctl mkfs.btrfs btrfs-debug-tree btrfs-show btrfs-vol \ btrfs btrfs-map-logical btrfs-image btrfs-zero-log btrfs-convert \ btrfs-find-root btrfs-restore btrfstune btrfs-show-super \ btrfs-dump-super @@ -111,10 +111,6 @@ btrfs-show: $(objects) btrfs-show.o @echo [LD] $@ $(Q)$(CC) $(CFLAGS) -o btrfs-show btrfs-show.o $(objects) $(LDFLAGS) $(LIBS) -btrfsck: $(objects) btrfsck.o - @echo [LD] $@ - $(Q)$(CC) $(CFLAGS) -o btrfsck btrfsck.o $(objects) $(LDFLAGS) $(LIBS) - mkfs.btrfs: $(objects) mkfs.o @echo [LD] $@ $(Q)$(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) -lblkid diff --git a/btrfs.c b/btrfs.c index 7b0e50f..cf2f320 100644 --- a/btrfs.c +++ b/btrfs.c @@ -48,8 +48,13 @@ int prefixcmp(const char *str, const char *prefix) return (unsigned char)*prefix - (unsigned char)*str; } -static int parse_one_token(const char *arg, const struct cmd_group *grp, - const struct cmd_struct **cmd_ret) +#define parse_one_token(arg, grp, cmd_ret) \ + _parse_one_token((arg), (grp), (cmd_ret), 0) +#define parse_one_exact_token(arg, grp, cmd_ret) \ + _parse_one_token((arg), (grp), (cmd_ret), 1) + +static int _parse_one_token(const char *arg, const struct cmd_group *grp, + const struct cmd_struct **cmd_ret, int exact) { const struct cmd_struct *cmd = grp-commands; const struct cmd_struct *abbrev_cmd = NULL, *ambiguous_cmd = NULL; @@ -80,12 +85,15 @@ static int parse_one_token(const char *arg, const struct cmd_group *grp, return 0; } - if (ambiguous_cmd) - return -2; + if (!exact) + { + if (ambiguous_cmd) + return -2; - if (abbrev_cmd) { - *cmd_ret = abbrev_cmd; - return 0; + if (abbrev_cmd) { + *cmd_ret = abbrev_cmd; + return 0; + } } return -1; @@ -246,6 +254,7 @@ const struct cmd_group btrfs_cmd_group = { { balance, cmd_balance, NULL, balance_cmd_group, 0 }, { device, cmd_device, NULL, device_cmd_group, 0 }, { scrub, cmd_scrub, NULL, scrub_cmd_group, 0 }, + { check, cmd_check, cmd_check_usage, NULL, 0 }, { inspect-internal, cmd_inspect, NULL, inspect_cmd_group, 0 }, { send, cmd_send, cmd_send_usage, NULL, 0 }, { receive, cmd_receive, cmd_receive_usage, NULL, 0 }, @@ -258,24 +267,47 @@ const struct cmd_group btrfs_cmd_group = { }, }; +static int cmd_dummy(int argc, char **argv) +{ + return 0; +} + +/* change behaviour depending on what we're called */ +const struct cmd_group function_cmd_group = { + NULL, NULL, + { + { btrfsck, cmd_check, NULL, NULL, 0 }, + { fsck.btrfs, cmd_dummy, NULL, NULL, 0 }, + { 0, 0, 0, 0, 0 } + }, +}; + int main(int argc, char **argv) { const struct cmd_struct *cmd; + char *called_as = strrchr(argv[0], '/'); + if (called_as) + argv[0] = ++called_as; crc32c_optimization_init(); - argc--; - argv++; - handle_options(argc, argv); - if (argc 0) { - if (!prefixcmp(argv[0], --)) - argv[0] += 2; - } else { - usage_command_group(btrfs_cmd_group, 0, 0); - exit(1); - } + /* if we have cmd, we're started as a sub command */ + if
Re: [RFC] Btrfs: Allow the compressed extent size limit to be modified v2
On Thu, Feb 7, 2013 at 6:28 PM, David Sterba d...@jikos.cz wrote: On Thu, Feb 07, 2013 at 03:38:34PM -0600, Mitch Harder wrote: --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -144,7 +144,7 @@ struct tree_block { unsigned int key_ready:1; }; -#define MAX_EXTENTS 128 +#define MAX_EXTENTS 512 Is this really related to compression? IIRC I've seen it only in context of batch work in reloc, but not anywhere near compression. (I may be wrong of course, just checking). When you defragment compressed extents, it will run through relocation. If autodefrag is enabled, I found most everything I touched was running through relocation. It has been a while since I looked at the issue, but I think balancing your data will also run through relocation. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: fix the deadlock between the transaction attach and commit
Here is the whole story: Trans_Attach_Task Trans_Commit_Task btrfs_commit_transaction() |-wait writers to be 1 btrfs_attach_transaction() | btrfs_commit_transaction() | | |-set trans_no_join to 1 | | (close join transaction) |-btrfs_run_ordered_operations | (Those ordered operations| are added when releasing| file) | |-btrfs_join_transaction() | |-wait_commit() | |-wait writers to be 1 Then these two tasks waited for each other. As we know, btrfs_attach_transaction() is used to catch the current transaction, and commit it, so if someone has committed the transaction, it is unnecessary to join it and commit it, wait is the best choice for it. In this way, we can fix the above problem. Signed-off-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/transaction.c |8 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f154946..7be9d5e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -285,6 +285,14 @@ static int may_wait_transaction(struct btrfs_root *root, int type) if (type == TRANS_USERSPACE) return 1; + /* +* If we are ATTACH, it means we just want to catch the current +* transaction and commit it. So if someone is committing the +* current transaction now, it is very glad to wait it. +*/ + if (type == TRANS_ATTACH) + return 1; + if (type == TRANS_START !atomic_read(root-fs_info-open_ioctl_trans)) return 1; -- 1.6.5.2 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V2 1/2] Btrfs: serialize unlocked dio reads with truncate
Currently, we can do unlocked dio reads, but the following race is possible: dio_read_task truncate_task -btrfs_setattr() -btrfs_direct_IO -__blockdev_direct_IO -btrfs_get_block -btrfs_truncate() #alloc truncated blocks #to other inode -submit_io() #INFORMATION LEAK In order to avoid this problem, we must serialize unlocked dio reads with truncate. There are two approaches: - use extent lock to protect the extent that we truncate - use inode_dio_wait() to make sure the truncating task will wait for the read DIO. If we use the 1st one, we will meet the endless truncation problem due to the nonlocked read DIO after we implement the nonlocked write DIO. It is because we still need invoke inode_dio_wait() avoid the race between write DIO and truncation. By that time, we have to introduce btrfs_inode_{block, resume}_nolock_dio() again. That is we have to implement this patch again, so I choose the 2nd way to fix the problem. Signed-off-by: Miao Xie mi...@cn.fujitsu.com --- Changlog v1 - v2: - Rebase the patch against the following one: [RFC][PATCH] Btrfs: fix deadlock due to unsubmitted - Modify the changelog to explain why we don't choose the extent lock to fix the bug --- fs/btrfs/btrfs_inode.h | 19 +++ fs/btrfs/inode.c | 23 +-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 2a8c242..00e2601 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -40,6 +40,7 @@ #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 #define BTRFS_INODE_NEEDS_FULL_SYNC7 #define BTRFS_INODE_COPY_EVERYTHING8 +#define BTRFS_INODE_READDIO_NEED_LOCK 9 /* in memory btrfs inode */ struct btrfs_inode { @@ -216,4 +217,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) return 0; } +/* + * Disable DIO read nolock optimization, so new dio readers will be forced + * to grab i_mutex. It is used to avoid the endless truncate due to + * nonlocked dio read. + */ +static inline void btrfs_inode_block_unlocked_dio(struct inode *inode) +{ + set_bit(BTRFS_INODE_READDIO_NEED_LOCK, BTRFS_I(inode)-runtime_flags); + smp_mb(); +} + +static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) +{ + smp_mb__before_clear_bit(); + clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, + BTRFS_I(inode)-runtime_flags); +} + #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c5d829d..a49be05 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3832,6 +3832,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) /* we don't support swapfiles, so vmtruncate shouldn't fail */ truncate_setsize(inode, newsize); + + /* Disable nonlocked read DIO to avoid the end less truncate */ + btrfs_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + btrfs_inode_resume_unlocked_dio(inode); + ret = btrfs_truncate(inode); if (ret inode-i_nlink) btrfs_orphan_del(NULL, inode); @@ -6615,6 +6621,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb-ki_filp; struct inode *inode = file-f_mapping-host; size_t count = 0; + int flags = 0; + bool wakeup = false; ssize_t ret; if (check_direct_IO(BTRFS_I(inode)-root, rw, iocb, iov, @@ -6626,13 +6634,22 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) return ret; + } else { + atomic_inc(inode-i_dio_count); + smp_mb__after_atomic_inc(); + if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, + BTRFS_I(inode)-runtime_flags))) { + inode_dio_done(inode); + flags = DIO_LOCKING | DIO_SKIP_HOLES; + } else { + wakeup = true; + } } ret = __blockdev_direct_IO(rw, iocb, inode, BTRFS_I(inode)-root-fs_info-fs_devices-latest_bdev, iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, - btrfs_submit_direct, 0); - + btrfs_submit_direct, flags); if (rw WRITE) { if (ret 0 ret != -EIOCBQUEUED) btrfs_delalloc_release_space(inode, count); @@ -6645,6 +6662,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, } btrfs_delalloc_release_metadata(inode, 0); } + if
[PATCH V2 2/2] Btrfs: implement unlocked dio write
This idea is from ext4. By this patch, we can make the dio write parallel, and improve the performance. But because we can not update isize without i_mutex, the unlocked dio write just can be done in front of the EOF. We needn't worry about the race between dio write and truncate, because the truncate need wait untill all the dio write end. And we also needn't worry about the race between dio write and punch hole, because we have extent lock to protect our operation. I ran fio to test the performance of this feature. == Hardware == CPU: Intel(R) Core(TM)2 Duo CPU E7500 @ 2.93GHz Mem: 2GB SSD: Intel X25-M 120GB (Test Partition: 60GB) == config file == [global] ioengine=psync direct=1 bs=4k size=32G runtime=60 directory=/mnt/btrfs/ filename=testfile group_reporting thread [file1] numjobs=1 # 2 4 rw=randwrite == result (KBps) == write 1 2 4 lock24936 24738 24726 nolock 24962 30866 32101 == result (iops) == write 1 2 4 lock623461846181 nolock 624077168025 Signed-off-by: Miao Xie mi...@cn.fujitsu.com --- Changelog v1 - v2: - don't do nolocked DIO write if it is beyond the EOF --- fs/btrfs/inode.c | 35 +++ 1 files changed, 23 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a49be05..2948123 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6622,28 +6622,36 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, struct inode *inode = file-f_mapping-host; size_t count = 0; int flags = 0; - bool wakeup = false; + bool wakeup = true; + bool relock = false; ssize_t ret; if (check_direct_IO(BTRFS_I(inode)-root, rw, iocb, iov, offset, nr_segs)) return 0; + atomic_inc(inode-i_dio_count); + smp_mb__after_atomic_inc(); + if (rw WRITE) { count = iov_length(iov, nr_segs); + /* +* If the write DIO is beyond the EOF, we need update +* the isize, but it is protected by i_mutex. So we can +* not unlock the i_mutex at this case. +*/ + if (offset + count = inode-i_size) { + mutex_unlock(inode-i_mutex); + relock = true; + } ret = btrfs_delalloc_reserve_space(inode, count); if (ret) - return ret; - } else { - atomic_inc(inode-i_dio_count); - smp_mb__after_atomic_inc(); - if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, - BTRFS_I(inode)-runtime_flags))) { - inode_dio_done(inode); - flags = DIO_LOCKING | DIO_SKIP_HOLES; - } else { - wakeup = true; - } + goto out; + } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, +BTRFS_I(inode)-runtime_flags))) { + inode_dio_done(inode); + flags = DIO_LOCKING | DIO_SKIP_HOLES; + wakeup = false; } ret = __blockdev_direct_IO(rw, iocb, inode, @@ -6662,8 +6670,11 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, } btrfs_delalloc_release_metadata(inode, 0); } +out: if (wakeup) inode_dio_done(inode); + if (relock) + mutex_lock(inode-i_mutex); return ret; } -- 1.6.5.2 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html