[PATCH] Btrfs: do not zero the page if it's in a hole
Don't bother zeroing the page if it's already a hole under there. We can save one allocation from this. Signed-off-by: Li Dongyang jerry87...@gmail.com --- fs/btrfs/inode.c | 17 + 1 file changed, 17 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 85a1e50..017052e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3481,6 +3481,7 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, struct extent_io_tree *io_tree = BTRFS_I(inode)-io_tree; struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; + struct extent_map *em = NULL; char *kaddr; u32 blocksize = root-sectorsize; pgoff_t index = from PAGE_CACHE_SHIFT; @@ -3538,6 +3539,20 @@ again: goto again; } + em = btrfs_get_extent_fiemap(inode, NULL, 0, page_start, +PAGE_CACHE_SIZE, 0); + if (!IS_ERR_OR_NULL(em) em-block_start == EXTENT_MAP_HOLE) { + u64 em_end = extent_map_end(em); + if (em-start = page_start + em_end = page_start + PAGE_CACHE_SIZE) { + btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); + unlock_extent_cached(io_tree, page_start, page_end, +cached_state, GFP_NOFS); + ret = 0; + goto out_unlock; + } + } + clear_extent_bit(BTRFS_I(inode)-io_tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, @@ -3574,6 +3589,8 @@ out_unlock: unlock_page(page); page_cache_release(page); out: + if (em) + free_extent_map(em); return ret; } -- 1.7.12.3 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: try harder when we merge holes
We should look at path-slots[0] rather than path-slots[0]+1 while trying to merge with the hole behind us. Also this patch will delete the the latter one if we can merge with both front and back, leaving one hole covers all three. Signed-off-by: Li Dongyang jerry87...@gmail.com --- fs/btrfs/file.c | 133 1 file changed, 76 insertions(+), 57 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9ab1bed..d41805a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1663,89 +1663,108 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } -static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf, - int slot, u64 start, u64 end) -{ - struct btrfs_file_extent_item *fi; - struct btrfs_key key; - - if (slot 0 || slot = btrfs_header_nritems(leaf)) - return 0; - - btrfs_item_key_to_cpu(leaf, key, slot); - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) - return 0; - - fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - - if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) - return 0; - - if (btrfs_file_extent_disk_bytenr(leaf, fi)) - return 0; - - if (key.offset == end) - return 1; - if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) - return 1; - return 0; -} - -static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, - struct btrfs_path *path, u64 offset, u64 end) +static int merge_holes(struct btrfs_trans_handle *trans, struct inode *inode, + struct btrfs_path *path, u64 offset, u64 end) { struct btrfs_root *root = BTRFS_I(inode)-root; struct extent_buffer *leaf; - struct btrfs_file_extent_item *fi; - struct extent_map *hole_em; - struct extent_map_tree *em_tree = BTRFS_I(inode)-extent_tree; + struct btrfs_file_extent_item *back_fi, *front_fi; struct btrfs_key key; int ret; + bool front_mergeable = false; + bool back_mergeable = false; key.objectid = btrfs_ino(inode); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = offset; - ret = btrfs_search_slot(trans, root, key, path, 0, 1); if (ret 0) return ret; BUG_ON(!ret); leaf = path-nodes[0]; - if (hole_mergeable(inode, leaf, path-slots[0]-1, offset, end)) { - u64 num_bytes; + if (path-slots[0] != 0) { + btrfs_item_key_to_cpu(leaf, key, path-slots[0]-1); + if (key.objectid == btrfs_ino(inode) + key.type == BTRFS_EXTENT_DATA_KEY) { + front_fi = btrfs_item_ptr(leaf, path-slots[0]-1, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, front_fi) == + BTRFS_FILE_EXTENT_REG + btrfs_file_extent_disk_bytenr(leaf, + front_fi) == 0 + key.offset + + btrfs_file_extent_num_bytes(leaf, front_fi) == + offset) { + front_mergeable = true; + } + } + } - path-slots[0]--; - fi = btrfs_item_ptr(leaf, path-slots[0], - struct btrfs_file_extent_item); - num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + - end - offset; - btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); - btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); - btrfs_set_file_extent_offset(leaf, fi, 0); - btrfs_mark_buffer_dirty(leaf); - goto out; + btrfs_item_key_to_cpu(leaf, key, path-slots[0]); + if (key.objectid == btrfs_ino(inode) + key.type == BTRFS_EXTENT_DATA_KEY) { + back_fi = btrfs_item_ptr(leaf, path-slots[0], +struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, back_fi) == + BTRFS_FILE_EXTENT_REG + btrfs_file_extent_disk_bytenr(leaf, back_fi) == 0 + key.offset == end) { + back_mergeable = true; + } } - if (hole_mergeable(inode, leaf, path-slots[0]+1, offset, end)) { + if (front_mergeable) { + u64 num_bytes = 0; + + if (back_mergeable) { + num_bytes = btrfs_file_extent_num_bytes(leaf, back_fi); + + ret = btrfs_del_item(trans, root, path
Re: [PATCH 1/2] Btrfs: fix space leak when skipping small extents during trimming
On Tuesday, June 21, 2011 01:49:19 PM Li Zefan wrote: We're taking a free space extent out of the free space cache, trimming it and then putting it back into the cache. sorry for the late reply, I can hardly to find time look at this. However for an extent that is smaller than the specified minimum length, it's taken out but won't be put back, which causes space leak. yes, you are correct, and the fix looks good to me, Thanks Signed-off-by: Li Zefan l...@cn.fujitsu.com --- Unfortunately I have no trim-able device to test the patch. --- fs/btrfs/free-space-cache.c | 34 +- 1 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9f985a4..292c0d9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2460,6 +2460,7 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, u64 bytes = 0; u64 actually_trimmed; int ret = 0; + int update_ret; *trimmed = 0; @@ -2483,6 +2484,7 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, } if (entry-bitmap) { + bytes = 0; ret = search_bitmap(ctl, entry, start, bytes); if (!ret) { if (start = end) { @@ -2490,6 +2492,8 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, break; } bytes = min(bytes, end - start); + if (bytes minlen) + goto next; bitmap_clear_bits(ctl, entry, start, bytes); if (entry-bytes == 0) free_bitmap(ctl, entry); @@ -2503,33 +2507,29 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, } else { start = entry-offset; bytes = min(entry-bytes, end - start); + if (bytes minlen) + goto next; unlink_free_space(ctl, entry); kmem_cache_free(btrfs_free_space_cachep, entry); } spin_unlock(ctl-tree_lock); - if (bytes = minlen) { - int update_ret; - update_ret = btrfs_update_reserved_bytes(block_group, - bytes, 1, 1); + update_ret = btrfs_update_reserved_bytes(block_group, + bytes, 1, 1); - ret = btrfs_error_discard_extent(fs_info-extent_root, - start, - bytes, - actually_trimmed); + ret = btrfs_error_discard_extent(fs_info-extent_root, start, + bytes, actually_trimmed); - btrfs_add_free_space(block_group, start, bytes); - if (!update_ret) - btrfs_update_reserved_bytes(block_group, - bytes, 0, 1); + btrfs_add_free_space(block_group, start, bytes); + if (!update_ret) + btrfs_update_reserved_bytes(block_group, bytes, 0, 1); - if (ret) - break; - *trimmed += actually_trimmed; - } + if (ret) + break; + *trimmed += actually_trimmed; +next: start += bytes; - bytes = 0; if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] Btrfs: add discard flag to btrfs_device
With discard flag in btrfs_device, we will only push trim request to the devices support that. Now we don't return EOPNOTSUPP to the caller, so we won't trigger BUG_ONs in the walk_log_tree functions if we mount a drive without DISCARD using -o discard, but it is still possible if we get errors from blkdev_issue_discard. This won't affect the return value of fstrim on the drives without DISCARD, because we've already checked that in btrfs_ioctl_fitrim, Thanks Changes v2: better name for the discard flag Signed-off-by: Li Dongyang lidongy...@novell.com --- fs/btrfs/extent-tree.c | 24 fs/btrfs/volumes.c |4 fs/btrfs/volumes.h |1 + 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1f61bf5..81ccae2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1774,7 +1774,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, u64 discarded_bytes = 0; struct btrfs_multi_bio *multi = NULL; - /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(root-fs_info-mapping_tree, REQ_DISCARD, bytenr, num_bytes, multi, 0); @@ -1782,25 +1781,26 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, struct btrfs_bio_stripe *stripe = multi-stripes; int i; - for (i = 0; i multi-num_stripes; i++, stripe++) { - ret = btrfs_issue_discard(stripe-dev-bdev, - stripe-physical, - stripe-length); - if (!ret) - discarded_bytes += stripe-length; - else if (ret != -EOPNOTSUPP) - break; + if (stripe-dev-has_trim) { + ret = btrfs_issue_discard(stripe-dev-bdev, + stripe-physical, + stripe-length); + if (!ret) + discarded_bytes += stripe-length; + else if (ret == -EOPNOTSUPP) { + stripe-dev-has_trim = 0; + ret = 0; + } else + break; + } } kfree(multi); } - if (discarded_bytes ret == -EOPNOTSUPP) - ret = 0; if (actual_bytes) *actual_bytes = discarded_bytes; - return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1efa56e..3d86b35 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -341,6 +341,7 @@ static noinline int device_list_add(const char *path, device-work.func = pending_bios_fn; memcpy(device-uuid, disk_super-dev_item.uuid, BTRFS_UUID_SIZE); + device-has_trim = 1; spin_lock_init(device-io_lock); device-name = kstrdup(path, GFP_NOFS); if (!device-name) { @@ -408,6 +409,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) device-devid = orig_dev-devid; device-work.func = pending_bios_fn; memcpy(device-uuid, orig_dev-uuid, sizeof(device-uuid)); + device-has_trim = 1; spin_lock_init(device-io_lock); INIT_LIST_HEAD(device-dev_list); INIT_LIST_HEAD(device-dev_alloc_list); @@ -1612,6 +1614,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) lock_chunks(root); device-writeable = 1; + device-has_trim = 1; device-work.func = pending_bios_fn; generate_random_uuid(device-uuid); spin_lock_init(device-io_lock); @@ -3342,6 +3345,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, return NULL; list_add(device-dev_list, fs_devices-devices); + device-has_trim = 1; device-dev_root = root-fs_info-dev_root; device-devid = devid; device-work.func = pending_bios_fn; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61..376d71c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -45,6 +45,7 @@ struct btrfs_device { int running_pending; u64 generation; + int has_trim; int writeable; int in_fs_metadata; int missing; -- 1.7.5.4 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org
Re: [PATCH 2/2] Btrfs: fix space leak when trimming free extents
On Tuesday, June 21, 2011 01:50:10 PM Li Zefan wrote: When the end of an extent exceeds the end of the specified range, the extent will be accidentally truncated. Signed-off-by: Li Zefan l...@cn.fujitsu.com --- fs/btrfs/free-space-cache.c |9 - 1 files changed, 8 insertions(+), 1 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 292c0d9..185cf8e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2509,8 +2509,15 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, bytes = min(entry-bytes, end - start); if (bytes minlen) goto next; + unlink_free_space(ctl, entry); - kmem_cache_free(btrfs_free_space_cachep, entry); + if (bytes entry-bytes) { + entry-offset = entry-offset + bytes; + entry-bytes = entry-bytes - bytes; + link_free_space(ctl, entry); yes, I forgot to link the rest extent to the free space cache. Thanks for the fix! + } else { + kmem_cache_free(btrfs_free_space_cachep, entry); + } } spin_unlock(ctl-tree_lock); -- 1.7.3.1 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: add discard flag to btrfs_device and make btrfs_discard_extent aware of that
With discard flag in btrfs_device, we will only push trim request to the devices support that. Now we don't return EOPNOTSUPP to the caller, so we won't trigger BUG_ONs in the walk_log_tree functions if we mount a drive without DISCARD using -o discard, but it is still possible if we get errors from blkdev_issue_discard. This won't affect the return value of fstrim on the drives without DISCARD, because we've already checked that in btrfs_ioctl_fitrim, Thanks Signed-off-by: Li Dongyang lidongy...@novell.com --- fs/btrfs/extent-tree.c | 24 fs/btrfs/volumes.c |4 fs/btrfs/volumes.h |1 + 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5b9b6b6..507cf8d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1774,7 +1774,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, u64 discarded_bytes = 0; struct btrfs_multi_bio *multi = NULL; - /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(root-fs_info-mapping_tree, REQ_DISCARD, bytenr, num_bytes, multi, 0); @@ -1782,25 +1781,26 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, struct btrfs_bio_stripe *stripe = multi-stripes; int i; - for (i = 0; i multi-num_stripes; i++, stripe++) { - ret = btrfs_issue_discard(stripe-dev-bdev, - stripe-physical, - stripe-length); - if (!ret) - discarded_bytes += stripe-length; - else if (ret != -EOPNOTSUPP) - break; + if (stripe-dev-discard) { + ret = btrfs_issue_discard(stripe-dev-bdev, + stripe-physical, + stripe-length); + if (!ret) + discarded_bytes += stripe-length; + else if (ret == -EOPNOTSUPP) { + stripe-dev-discard = 0; + ret = 0; + } else + break; + } } kfree(multi); } - if (discarded_bytes ret == -EOPNOTSUPP) - ret = 0; if (actual_bytes) *actual_bytes = discarded_bytes; - return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index da541df..bdf5604 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -341,6 +341,7 @@ static noinline int device_list_add(const char *path, device-work.func = pending_bios_fn; memcpy(device-uuid, disk_super-dev_item.uuid, BTRFS_UUID_SIZE); + device-discard = 1; spin_lock_init(device-io_lock); device-name = kstrdup(path, GFP_NOFS); if (!device-name) { @@ -408,6 +409,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) device-devid = orig_dev-devid; device-work.func = pending_bios_fn; memcpy(device-uuid, orig_dev-uuid, sizeof(device-uuid)); + device-discard = 1; spin_lock_init(device-io_lock); INIT_LIST_HEAD(device-dev_list); INIT_LIST_HEAD(device-dev_alloc_list); @@ -1616,6 +1618,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) lock_chunks(root); device-writeable = 1; + device-discard = 1; device-work.func = pending_bios_fn; generate_random_uuid(device-uuid); spin_lock_init(device-io_lock); @@ -3346,6 +3349,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, return NULL; list_add(device-dev_list, fs_devices-devices); + device-discard = 1; device-dev_root = root-fs_info-dev_root; device-devid = devid; device-work.func = pending_bios_fn; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61..9f7e56c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -45,6 +45,7 @@ struct btrfs_device { int running_pending; u64 generation; + int discard; int writeable; int in_fs_metadata; int missing; -- 1.7.5.4 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs: add discard flag to btrfs_device and make btrfs_discard_extent aware of that
On Friday, June 10, 2011 08:00:17 AM David Sterba wrote: On Thu, Jun 09, 2011 at 03:28:09PM +0800, Li Dongyang wrote: --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -45,6 +45,7 @@ struct btrfs_device { int running_pending; u64 generation; + int discard; can you pick a better name? this does not describe that it's the capability of the device, but rather 'do a discard'. I feel the same, I picked the name because there was btrfs_device-barriers, and it was removed in commit c3b9a62c8f932f32a733d6b628f61f3f28345727 something like has_discard, can_discard, has_trim, etc int writeable; int in_fs_metadata; int missing; otherwise the patch looks good (and matches my view how to do it). I will test it eventually. Thanks a lot, I'll resend this this a proper name. david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btrfs: fix crash when no drive supports DISCARD
On Friday, May 20, 2011 10:15:46 PM Josef Bacik wrote: On 05/20/2011 07:52 AM, David Sterba wrote: On Wed, May 18, 2011 at 11:29:14AM +0800, Li Dongyang wrote: Thanks for the fix, I thought EOPNOTSUPP could be useful by the caller that time, and maybe we could do somthing like remove the discard mount_opt in the fs_info so we can avoid calling it again. I do not agree that discard should be removed from mount_opt, because one may add TRIM-capable devices later, it's a whole filesystem option, while. A disappeared discard mount option will probably cause panic in administrator's head. However, if a drive does not support TRIM, the btrfs_issue_discard calls can take a shortcut and do not call up to blkdev_issue_discard (though it does return immediatelly), caching the state after first call. But this is matter of the lower level call (blkdev) and should not be propagated beyond to the extent level (ie. btrfs_discard_extent). There ought to just be a flag added to btrfs_device to say whether or not it supports discard, and if we get back EOPNOTSUPP we stop putting discards down on that device, that way if we have some devices that do it and some that don't (like for instance if we do that tiered caching with ssd's thing) we can make sure discard is actually done on drives that care about it. Thanks, Josef This should be the best way. but if we do not export the EOPNOTSUPP to callers of btrfs_discard_extent, we will get no errors if we run fstrim on a btrfs which all the devices don't have trim, I think this is not what we want. Before commit 5378e60734f5b7bfe1b43dc191aaf6131c1befe7, btrfs_discard_extent will only return the error of btrfs_map_block,so I think we should move the BUG_ONs in to btrfs_discard_extent from the callers as actually they are testing the result of btrfs_map_block. Thanks Li Dongyang -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] btrfs: fix crash when no drive supports DISCARD
On Wednesday, May 18, 2011 12:00:31 AM David Sterba wrote: xfstests/013 crashes when the test partition is mounted with -o discard: walk_up_log_tree btrfs_free_reserved_extent btrfs_discard_extent return -EOPNOTSUPP BUG_ON ret btrfs_discard_extent() should be fine when drive does not support the DISCARD operation and filter the EOPNOTSUPP retcode, but currently it does this only when some bytes were succesfully discarded. Signed-off-by: David Sterba dste...@suse.cz CC: sta...@kernel.org --- fs/btrfs/extent-tree.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ee6bd5..feab2ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1790,7 +1790,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, } kfree(multi); } - if (discarded_bytes ret == -EOPNOTSUPP) + if (ret == -EOPNOTSUPP) ret = 0; if (actual_bytes) Thanks for the fix, I thought EOPNOTSUPP could be useful by the caller that time, and maybe we could do somthing like remove the discard mount_opt in the fs_info so we can avoid calling it again. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V4 0/4] Btrfs: batched discard support for btrfs
On Monday, March 28, 2011 09:39:26 AM Chris Mason wrote: Excerpts from Chris Mason's message of 2011-03-27 21:30:20 -0400: Excerpts from Chris Mason's message of 2011-03-27 14:10:46 -0400: Excerpts from Li Dongyang's message of 2011-03-24 06:24:24 -0400: Dear list, This is V4 of batched discard support, now we will get full mapping of the free space on each device for RAID0/1/10/DUP instead of just a single stripe length, and tested with xfsstests 251, Thanks. I've pushed this out into the for-linus branch, along with a full merge to 2.6.39 current git. Please take a look and make sure I've merged it correctly. Looks good to me. Hmmm, this was doing mod operations on 64 bit numbers, so it didn't compile at all on 32 bit machines. I've fixed it up and pushed the result out to for-linus. Please check the math ;) sorry for being so stupid, thanks for fixing ;-) Br, Li Dongyang BTW, I just rebased this so the incremental fix was before merging into Linus' tree. -chris -chris Thanks! -chris Changelog V4: *make btrfs_map_block() return full mapping. Changelog V3: *fix style problems. *rebase to 2.6.38-rc7. Changelog V2: *Check if we have devices support trim before trying to trim the fs, also adjust minlen according to the discard_granularity. *Update reserved extent calculations in btrfs_trim_block_group(). *Call cond_resched() without checking need_resched() *Use bitmap_clear_bits() and unlink_free_space() instead of btrfs_remove_free_space(), so we won't search the same extent for twice. *Try harder in btrfs_discard_extent(), now we won't report errors if it's not a EOPNOTSUPP. *make sure the block group is cached before trimming it,or we'll see an empty caching tree if the block group is not cached. *Minor return value fix in btrfs_discard_block_group(). -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V4 4/4] Btrfs: add btrfs_trim_fs() to handle FITRIM
We take an free extent out from allocator, trim it, then put it back, but before we trim the block group, we should make sure the block group is cached, so plus a little change to make cache_block_group() run without a transaction. Signed-off-by: Li Dongyang lidongy...@novell.com --- fs/btrfs/ctree.h|1 + fs/btrfs/extent-tree.c | 50 +++- fs/btrfs/free-space-cache.c | 92 +++ fs/btrfs/free-space-cache.h |2 + fs/btrfs/ioctl.c| 46 + 5 files changed, 190 insertions(+), 1 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 94bb772..df206c1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2232,6 +2232,7 @@ int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 *actual_bytes); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 10e542a..d876759 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -440,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * allocate blocks for the tree root we can't do the fast caching since * we likely hold important locks. */ - if (!trans-transaction-in_commit + if (trans (!trans-transaction-in_commit) (root root != root-fs_info-tree_root)) { spin_lock(cache-lock); if (cache-cached != BTRFS_CACHE_NO) { @@ -8739,3 +8739,51 @@ int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, { return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); } + +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) +{ + struct btrfs_fs_info *fs_info = root-fs_info; + struct btrfs_block_group_cache *cache = NULL; + u64 group_trimmed; + u64 start; + u64 end; + u64 trimmed = 0; + int ret = 0; + + cache = btrfs_lookup_block_group(fs_info, range-start); + + while (cache) { + if (cache-key.objectid = (range-start + range-len)) { + btrfs_put_block_group(cache); + break; + } + + start = max(range-start, cache-key.objectid); + end = min(range-start + range-len, + cache-key.objectid + cache-key.offset); + + if (end - start = range-minlen) { + if (!block_group_cache_done(cache)) { + ret = cache_block_group(cache, NULL, root, 0); + if (!ret) + wait_block_group_cache_done(cache); + } + ret = btrfs_trim_block_group(cache, +group_trimmed, +start, +end, +range-minlen); + + trimmed += group_trimmed; + if (ret) { + btrfs_put_block_group(cache); + break; + } + } + + cache = next_block_group(fs_info-tree_root, cache); + } + + range-len = trimmed; + return ret; +} diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a039065..d0dc812 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2154,3 +2154,95 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) cluster-block_group = NULL; } +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen) +{ + struct btrfs_free_space *entry = NULL; + struct btrfs_fs_info *fs_info = block_group-fs_info; + u64 bytes = 0; + u64 actually_trimmed; + int ret = 0; + + *trimmed = 0; + + while (start end) { + spin_lock(block_group-tree_lock); + + if (block_group-free_space minlen) { + spin_unlock(block_group-tree_lock); + break; + } + + entry = tree_search_offset(block_group, start, 0, 1); + if (!entry) + entry = tree_search_offset(block_group, + offset_to_bitmap(block_group, + start
[PATCH V4 2/4] Btrfs: make btrfs_map_block() return entire free extent for each device of RAID0/1/10/DUP
btrfs_map_block() will only return a single stripe length, but we want the full extent be mapped to each disk when we are trimming the extent, so we add length to btrfs_bio_stripe and fill it if we are mapping for REQ_DISCARD. Signed-off-by: Li Dongyang lidongy...@novell.com --- fs/btrfs/volumes.c | 150 fs/btrfs/volumes.h |1 + 2 files changed, 129 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd13eb8..e81cce6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2962,7 +2962,10 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, struct extent_map_tree *em_tree = map_tree-map_tree; u64 offset; u64 stripe_offset; + u64 stripe_end_offset; u64 stripe_nr; + u64 stripe_nr_orig; + u64 stripe_nr_end; int stripes_allocated = 8; int stripes_required = 1; int stripe_index; @@ -2971,7 +2974,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int max_errors = 0; struct btrfs_multi_bio *multi = NULL; - if (multi_ret !(rw REQ_WRITE)) + if (multi_ret !(rw (REQ_WRITE | REQ_DISCARD))) stripes_allocated = 1; again: if (multi_ret) { @@ -3017,7 +3020,15 @@ again: max_errors = 1; } } - if (multi_ret (rw REQ_WRITE) + if (rw REQ_DISCARD) { + if (map-type (BTRFS_BLOCK_GROUP_RAID0 | +BTRFS_BLOCK_GROUP_RAID1 | +BTRFS_BLOCK_GROUP_DUP | +BTRFS_BLOCK_GROUP_RAID10)) { + stripes_required = map-num_stripes; + } + } + if (multi_ret (rw (REQ_WRITE | REQ_DISCARD)) stripes_allocated stripes_required) { stripes_allocated = map-num_stripes; free_extent_map(em); @@ -3037,12 +3048,15 @@ again: /* stripe_offset is the offset of this block in its stripe*/ stripe_offset = offset - stripe_offset; - if (map-type (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | -BTRFS_BLOCK_GROUP_RAID10 | -BTRFS_BLOCK_GROUP_DUP)) { + if (rw REQ_DISCARD) + *length = min_t(u64, em-len - offset, *length); + else if (map-type (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | + BTRFS_BLOCK_GROUP_DUP)) { /* we limit the length of each bio to what fits in a stripe */ *length = min_t(u64, em-len - offset, - map-stripe_len - stripe_offset); + map-stripe_len - stripe_offset); } else { *length = em-len - offset; } @@ -3052,8 +3066,19 @@ again: num_stripes = 1; stripe_index = 0; - if (map-type BTRFS_BLOCK_GROUP_RAID1) { - if (unplug_page || (rw REQ_WRITE)) + stripe_nr_orig = stripe_nr; + stripe_nr_end = (offset + *length + map-stripe_len - 1) + (~(map-stripe_len - 1)); + do_div(stripe_nr_end, map-stripe_len); + stripe_end_offset = stripe_nr_end * map-stripe_len - + (offset + *length); + if (map-type BTRFS_BLOCK_GROUP_RAID0) { + if (rw REQ_DISCARD) + num_stripes = min_t(u64, map-num_stripes, + stripe_nr_end - stripe_nr_orig); + stripe_index = do_div(stripe_nr, map-num_stripes); + } else if (map-type BTRFS_BLOCK_GROUP_RAID1) { + if (unplug_page || (rw (REQ_WRITE | REQ_DISCARD))) num_stripes = map-num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -3064,7 +3089,7 @@ again: } } else if (map-type BTRFS_BLOCK_GROUP_DUP) { - if (rw REQ_WRITE) + if (rw (REQ_WRITE | REQ_DISCARD)) num_stripes = map-num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -3077,6 +3102,10 @@ again: if (unplug_page || (rw REQ_WRITE)) num_stripes = map-sub_stripes; + else if (rw REQ_DISCARD) + num_stripes = min_t(u64, map-sub_stripes * + (stripe_nr_end - stripe_nr_orig), + map-num_stripes); else if (mirror_num) stripe_index += mirror_num - 1; else { @@ -3094,24 +3123,101 @@ again: } BUG_ON(stripe_index = map-num_stripes); - for (i
[PATCH V4 3/4] Btrfs: adjust btrfs_discard_extent() return errors and trimmed bytes
Callers of btrfs_discard_extent() should check if we are mounted with -o discard, as we want to make fitrim to work even the fs is not mounted with -o discard. Also we should use REQ_DISCARD to map the free extent to get a full mapping, last we only return errors if 1. the error is not a EOPNOTSUPP 2. no device supports discard Signed-off-by: Li Dongyang lidongy...@novell.com --- fs/btrfs/ctree.h |2 +- fs/btrfs/disk-io.c |5 - fs/btrfs/extent-tree.c | 45 ++--- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c84551..94bb772 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2229,7 +2229,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end); int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes); + u64 num_bytes, u64 *actual_bytes); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 100b07f..98b60b0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2947,7 +2947,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, break; /* opt_discard */ - ret = btrfs_error_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_error_discard_extent(root, start, +end + 1 - start, +NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index caa4254..10e542a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1738,40 +1738,44 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } -static void btrfs_issue_discard(struct block_device *bdev, +static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { - blkdev_issue_discard(bdev, start 9, len 9, GFP_KERNEL, 0); + return blkdev_issue_discard(bdev, start 9, len 9, GFP_KERNEL, 0); } static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes) + u64 num_bytes, u64 *actual_bytes) { int ret; - u64 map_length = num_bytes; + u64 discarded_bytes = 0; struct btrfs_multi_bio *multi = NULL; - if (!btrfs_test_opt(root, DISCARD)) - return 0; - /* Tell the block device(s) that the sectors can be discarded */ - ret = btrfs_map_block(root-fs_info-mapping_tree, READ, - bytenr, map_length, multi, 0); + ret = btrfs_map_block(root-fs_info-mapping_tree, REQ_DISCARD, + bytenr, num_bytes, multi, 0); if (!ret) { struct btrfs_bio_stripe *stripe = multi-stripes; int i; - if (map_length num_bytes) - map_length = num_bytes; - for (i = 0; i multi-num_stripes; i++, stripe++) { - btrfs_issue_discard(stripe-dev-bdev, - stripe-physical, - map_length); + ret = btrfs_issue_discard(stripe-dev-bdev, + stripe-physical, + stripe-length); + if (!ret) + discarded_bytes += stripe-length; + else if (ret != -EOPNOTSUPP) + break; } kfree(multi); } + if (discarded_bytes ret == -EOPNOTSUPP) + ret = 0; + + if (actual_bytes) + *actual_bytes = discarded_bytes; + return ret; } @@ -4361,7 +4365,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, if (ret) break; - ret = btrfs_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_discard_extent(root, start, + end + 1 - start, NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); unpin_extent_range(root, start, end); @@ -5410,7 +5416,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root
[PATCH V4 0/4] Btrfs: batched discard support for btrfs
Dear list, This is V4 of batched discard support, now we will get full mapping of the free space on each device for RAID0/1/10/DUP instead of just a single stripe length, and tested with xfsstests 251, Thanks. Changelog V4: *make btrfs_map_block() return full mapping. Changelog V3: *fix style problems. *rebase to 2.6.38-rc7. Changelog V2: *Check if we have devices support trim before trying to trim the fs, also adjust minlen according to the discard_granularity. *Update reserved extent calculations in btrfs_trim_block_group(). *Call cond_resched() without checking need_resched() *Use bitmap_clear_bits() and unlink_free_space() instead of btrfs_remove_free_space(), so we won't search the same extent for twice. *Try harder in btrfs_discard_extent(), now we won't report errors if it's not a EOPNOTSUPP. *make sure the block group is cached before trimming it,or we'll see an empty caching tree if the block group is not cached. *Minor return value fix in btrfs_discard_block_group(). -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V4 1/4] Btrfs: make update_reserved_bytes() public
Make the function public as we should update the reserved extents calculations after taking out an extent for trimming. Signed-off-by: Li Dongyang lidongy...@novell.com --- fs/btrfs/ctree.h|2 ++ fs/btrfs/extent-tree.c | 16 +++- 2 files changed, 9 insertions(+), 9 deletions(-) create mode 100644 fs/btrfs/Module.symvers diff --git a/fs/btrfs/Module.symvers b/fs/btrfs/Module.symvers new file mode 100644 index 000..e69de29 diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7f78cc7..2c84551 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2157,6 +2157,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int sinfo); int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7b3089b..caa4254 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -36,8 +36,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); -static int update_reserved_bytes(struct btrfs_block_group_cache *cache, -u64 num_bytes, int reserve, int sinfo); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -4223,8 +4221,8 @@ int btrfs_pin_extent(struct btrfs_root *root, * update size of reserved extents. this function may return -EAGAIN * if 'reserve' is true or 'sinfo' is false. */ -static int update_reserved_bytes(struct btrfs_block_group_cache *cache, -u64 num_bytes, int reserve, int sinfo) +int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int sinfo) { int ret = 0; if (sinfo) { @@ -4704,10 +4702,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, buf-bflags)); btrfs_add_free_space(cache, buf-start, buf-len); - ret = update_reserved_bytes(cache, buf-len, 0, 0); + ret = btrfs_update_reserved_bytes(cache, buf-len, 0, 0); if (ret == -EAGAIN) { /* block group became read-only */ - update_reserved_bytes(cache, buf-len, 0, 1); + btrfs_update_reserved_bytes(cache, buf-len, 0, 1); goto out; } @@ -5191,7 +5189,7 @@ checks: search_start - offset); BUG_ON(offset search_start); - ret = update_reserved_bytes(block_group, num_bytes, 1, + ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, (data BTRFS_BLOCK_GROUP_DATA)); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); @@ -5415,7 +5413,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) ret = btrfs_discard_extent(root, start, len); btrfs_add_free_space(cache, start, len); - update_reserved_bytes(cache, len, 0, 1); + btrfs_update_reserved_bytes(cache, len, 0, 1); btrfs_put_block_group(cache); return ret; @@ -5614,7 +5612,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, put_caching_control(caching_ctl); } - ret = update_reserved_bytes(block_group, ins-offset, 1, 1); + ret = btrfs_update_reserved_bytes(block_group, ins-offset, 1, 1); BUG_ON(ret); btrfs_put_block_group(block_group); ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, -- 1.7.4.1 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH V3] Btrfs: Batched discard support for btrfs
Here is V3, sorry for sending the patch out without checking the style. Tested with xfstests 251, we are still only trimming the first stripe of a free extent from RAID0/1/10/DUP, but I think we are done with FITRIM, the problem could be addressed by loop inside btrfs_discard_extent() and trim the extent stripe by stripe, or make the free extent be mapped as a whole, and that could been done in another patch, Thanks. Signed-off-by: Li Dongyang lidongy...@novell.com Reviewed-by: David Sterba dste...@suse.cz Reviewed-by: Kurt Garloff garl...@suse.de --- Changelog V3: *fix style problems. *rebase to 2.6.38-rc7. Changelog V2: *Check if we have devices support trim before trying to trim the fs, also adjust minlen according to the discard_granularity. *Update reserved extent calculations in btrfs_trim_block_group(). *Call cond_resched() without checking need_resched() *Use bitmap_clear_bits() and unlink_free_space() instead of btrfs_remove_free_space(), so we won't search the same extent for twice. *Try harder in btrfs_discard_extent(), now we won't report errors if it's not a EOPNOTSUPP. *make sure the block group is cached before trimming it,or we'll see an empty caching tree if the block group is not cached. *Minor return value fix in btrfs_discard_block_group(). --- fs/btrfs/ctree.h|5 ++- fs/btrfs/disk-io.c |5 ++- fs/btrfs/extent-tree.c | 103 +-- fs/btrfs/free-space-cache.c | 92 ++ fs/btrfs/free-space-cache.h |2 + fs/btrfs/ioctl.c| 46 +++ 6 files changed, 227 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6f820fa..9511af9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2148,6 +2148,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int sinfo); int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -2218,9 +2220,10 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end); int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes); + u64 num_bytes, u64 *actual_bytes); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1aa8d6..bcb9451 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2947,7 +2947,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, break; /* opt_discard */ - ret = btrfs_error_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_error_discard_extent(root, start, +end + 1 - start, +NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 588ff98..fbd7bd6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -36,8 +36,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); -static int update_reserved_bytes(struct btrfs_block_group_cache *cache, -u64 num_bytes, int reserve, int sinfo); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -442,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * allocate blocks for the tree root we can't do the fast caching since * we likely hold important locks. */ - if (!trans-transaction-in_commit + if (trans (!trans-transaction-in_commit) (root root != root-fs_info-tree_root)) { spin_lock(cache-lock); if (cache
Re: [PATCH V2] Btrfs: Batched discard support for btrfs
On Friday, February 25, 2011 04:16:27 PM Li Dongyang wrote: Thanks for your comments, here is the updated patch. I've tested it with xfstests 251(thanks to Lukas), and it looks fine to me. when we call btrfs_map_block() for RAID0/1/10/ or DUP, it only returns a single stripe length at most, I'm a bit confused why we are doing this and it makes a little trouble to this patch: we just trim the first stripe on each device right now. We can loop in btrfs_discard_extent(), mapping each stripe and trim them, but I think the ideal way is mapping the full length of the free extent and trim that all at once, ideas? Thanks a lot, Li Dongyang Signed-off-by: Li Dongyang lidongy...@novell.com Reviewed-by: David Sterba dste...@suse.cz Reviewed-by: Kurt Garloff garl...@suse.de --- Changelog V2: *Check if we have devices support trim before trying to trim the fs, also adjust minlen according to the discard_granularity. *Update reserved extent calculations in btrfs_trim_block_group(). *Call cond_resched() without checking need_resched() *Use bitmap_clear_bits() and unlink_free_space() instead of btrfs_remove_free_space(), so we won't search the same extent for twice. *Try harder in btrfs_discard_extent(), now we won't report errors if it's not a EOPNOTSUPP. *make sure the block group is cached before trimming it,or we'll see an empty caching tree if the block group is not cached. *Minor return value fix in btrfs_discard_block_group(). --- fs/btrfs/ctree.h|5 ++- fs/btrfs/disk-io.c |5 ++- fs/btrfs/extent-tree.c | 102 +-- fs/btrfs/free-space-cache.c | 92 ++ fs/btrfs/free-space-cache.h |2 + fs/btrfs/ioctl.c| 47 6 files changed, 227 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c98b3a..5cbc05c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2147,6 +2147,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int sinfo); int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -2217,7 +2219,8 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end); int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, -u64 num_bytes); +u64 num_bytes, u64 *actual_bytes); +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1aa8d6..bcb9451 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2947,7 +2947,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, break; /* opt_discard */ - ret = btrfs_error_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_error_discard_extent(root, start, + end + 1 - start, + NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f3c96fc..38100c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -36,8 +36,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); -static int update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -442,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * allocate blocks for the tree root we can't do the fast caching since * we likely hold important locks. */ - if (!trans-transaction-in_commit + if (trans (!trans-transaction-in_commit) (root root != root-fs_info
[PATCH V2] Btrfs: Batched discard support for btrfs
Thanks for your comments, here is the updated patch. I've tested it with xfstests 251(thanks to Lukas), and it looks fine to me. Signed-off-by: Li Dongyang lidongy...@novell.com Reviewed-by: David Sterba dste...@suse.cz Reviewed-by: Kurt Garloff garl...@suse.de --- Changelog V2: *Check if we have devices support trim before trying to trim the fs, also adjust minlen according to the discard_granularity. *Update reserved extent calculations in btrfs_trim_block_group(). *Call cond_resched() without checking need_resched() *Use bitmap_clear_bits() and unlink_free_space() instead of btrfs_remove_free_space(), so we won't search the same extent for twice. *Try harder in btrfs_discard_extent(), now we won't report errors if it's not a EOPNOTSUPP. *make sure the block group is cached before trimming it,or we'll see an empty caching tree if the block group is not cached. *Minor return value fix in btrfs_discard_block_group(). --- fs/btrfs/ctree.h|5 ++- fs/btrfs/disk-io.c |5 ++- fs/btrfs/extent-tree.c | 102 +-- fs/btrfs/free-space-cache.c | 92 ++ fs/btrfs/free-space-cache.h |2 + fs/btrfs/ioctl.c| 47 6 files changed, 227 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c98b3a..5cbc05c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2147,6 +2147,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int sinfo); int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -2217,7 +2219,8 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end); int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes); + u64 num_bytes, u64 *actual_bytes); +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1aa8d6..bcb9451 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2947,7 +2947,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, break; /* opt_discard */ - ret = btrfs_error_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_error_discard_extent(root, start, +end + 1 - start, +NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f3c96fc..38100c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -36,8 +36,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); -static int update_reserved_bytes(struct btrfs_block_group_cache *cache, -u64 num_bytes, int reserve, int sinfo); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -442,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * allocate blocks for the tree root we can't do the fast caching since * we likely hold important locks. */ - if (!trans-transaction-in_commit + if (trans (!trans-transaction-in_commit) (root root != root-fs_info-tree_root)) { spin_lock(cache-lock); if (cache-cached != BTRFS_CACHE_NO) { @@ -1740,24 +1738,22 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } -static void btrfs_issue_discard(struct block_device *bdev, +static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { - blkdev_issue_discard(bdev, start 9, len 9, GFP_KERNEL, 0); + return blkdev_issue_discard(bdev, start 9, len 9, GFP_KERNEL, 0); } static int
[PATCH] Btrfs: make sure we call recalculate_thresholds() in btrfs_remove_free_space()
we should recalculate the thresholds everytime when we add/free a bitmap, make sure we do this in btrfs_remove_free_space() by calling free_bitmap(),Thanks Signed-off-by: Li Dongyang lidongy...@novell.com --- fs/btrfs/free-space-cache.c | 10 +- 1 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a039065..f631817 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1515,12 +1515,12 @@ again: } if (info-bytes == bytes) { - unlink_free_space(block_group, info); - if (info-bitmap) { - kfree(info-bitmap); - block_group-total_bitmaps--; + if (info-bitmap) + free_bitmap(block_group, info); + else { + unlink_free_space(block_group, info); + kfree(info); } - kfree(info); goto out_lock; } -- 1.7.4.1 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: Batched discard support for btrfs
Here is batched discard support for btrfs, several changes were made: btrfs_test_opt(root, DISCARD) is moved from btrfs_discard_extent to callers, as we still want to trim the fs even it's not mounted with -o discard. btrfs_discard_extent now reports errors and actual bytes trimmed to callers, for EOPNOTSUPP, we will try other stripes as an extent could span SSD and other drives, and we won't return error to callers unless we failed with all stripes. And btrfs_discard_extent calls btrfs_map_block with READ, this means we won't get all stripes mapped for RAID1/DUP/RAID10, I think this should be fixed, Thanks. Signed-off-by: Li Dongyang lidongy...@novell.com --- fs/btrfs/ctree.h|3 +- fs/btrfs/disk-io.c |5 ++- fs/btrfs/extent-tree.c | 81 --- fs/btrfs/free-space-cache.c | 79 + fs/btrfs/free-space-cache.h |2 + fs/btrfs/ioctl.c| 24 + 6 files changed, 179 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c98b3a..4486349 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2217,7 +2217,8 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end); int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes); + u64 num_bytes, u64 *actual_bytes); +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1aa8d6..bcb9451 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2947,7 +2947,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, break; /* opt_discard */ - ret = btrfs_error_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_error_discard_extent(root, start, +end + 1 - start, +NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f3c96fc..7bed32a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1740,22 +1740,20 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } -static void btrfs_issue_discard(struct block_device *bdev, +static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { - blkdev_issue_discard(bdev, start 9, len 9, GFP_KERNEL, 0); + return blkdev_issue_discard(bdev, start 9, len 9, GFP_KERNEL, 0); } static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes) + u64 num_bytes, u64 *actual_bytes) { int ret; u64 map_length = num_bytes; + u64 discarded_bytes = 0; struct btrfs_multi_bio *multi = NULL; - if (!btrfs_test_opt(root, DISCARD)) - return 0; - /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(root-fs_info-mapping_tree, READ, bytenr, map_length, multi, 0); @@ -1767,13 +1765,25 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, map_length = num_bytes; for (i = 0; i multi-num_stripes; i++, stripe++) { - btrfs_issue_discard(stripe-dev-bdev, - stripe-physical, - map_length); + ret = btrfs_issue_discard(stripe-dev-bdev, + stripe-physical, + map_length); + if (!ret) + discarded_bytes += map_length; + else if (ret == -EOPNOTSUPP) + continue; + else + break; } kfree(multi); } + if (discarded_bytes) + ret = 0; + + if (actual_bytes) + *actual_bytes = discarded_bytes; + return ret; } @@ -4353,7 +4363,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, if (ret) break; - ret = btrfs_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt
Re: cp --reflink with Btrfs
Have a look at line 998, ioctl.c, inside btrfs_ioctl_clone(), the src-i_size(the size of the testfile created by touch) is just 0, and this will cause btrfs_ioctl_clone just return -EINVAL. I'm not sure if it makes sense to clone a file which actually doesn't have any data extents. On Wednesday 16 December 2009 07:37:42 Jason White wrote: Josef Bacik jo...@redhat.com wrote: On Sun, Dec 13, 2009 at 12:29:03AM +, Jason White wrote: I am testing a Btrfs root file system with Debian (kernel 2.6.32) under KVM. ja...@vrtl:~$ touch testfile ja...@vrtl:~$ cp --reflink testfile /tmp cp: failed to clone `/tmp/testfile': Invalid argument This is with GNU Coreutils 8.0 taken from debian Sid. Is this a Coreutils issue, a Btrfs problem or something in my local configuration? Try using bcp, if that works then its likely a problem with coreutils. After reporting this to Debian and engaging on follow-up discussion, it turns out that bcp copies the data if the ioctl() call to clone the file fails, as can be seen from the Python code (which I should have read, but didn't...). Unfortunately the ioctl() call is failing both in bcp and in cp --reflink. Here's partial strace output from the latter. cp --reflink testfile testfile2 open(testfile, O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0 open(testfile2, O_WRONLY|O_CREAT|O_EXCL, 0644) = 4 fstat(4, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0 ioctl(4, 0x40049409, 0x3) = -1 EINVAL (Invalid argument) Kernel 2.6.32 (debian Sid), x86-64 architecture. Suggestions welcome. Debian bug report: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=561225 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: fix comipler warning with tree_search
fix comipler warning with tree_search Signed-off-by: Li Dongyang jerry87...@gmail.com --- fs/btrfs/ordered-data.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ab21c29..8877fa0 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -133,7 +133,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, u64 file_offset) { struct rb_root *root = tree-tree; - struct rb_node *prev; + struct rb_node *prev = NULL; struct rb_node *ret; struct btrfs_ordered_extent *entry; -- 1.6.5.3 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH]btrfs: don't remove device on a raid0 array when device number is 2
we should not make a raid0 array go below 2 devices. Signed-off-by: Li Dongyang jerry87...@gmail.com --- fs/btrfs/volumes.c |8 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 20cbd2e..801b8d0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1150,6 +1150,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) goto out; } + if ((all_avail BTRFS_BLOCK_GROUP_RAID0) + root-fs_info-fs_devices-rw_devices = 2) { + printk(KERN_ERR btrfs: unable to go below two + devices on raid0\n); + ret = -EINVAL; + goto out; + } + if (strcmp(device_path, missing) == 0) { struct list_head *devices; struct btrfs_device *tmp; -- 1.6.5.4 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html