Here is V3, sorry for sending the patch out without checking the style.
Tested with xfstests 251, we are still only trimming the first stripe of
a free extent from RAID0/1/10/DUP, but I think we are done with FITRIM,
the problem could be addressed by loop inside btrfs_discard_extent() and
trim the extent stripe by stripe, or make the free extent be mapped as a whole,
and that could been done in another patch, Thanks.

Signed-off-by: Li Dongyang <lidongy...@novell.com>
Reviewed-by: David Sterba <dste...@suse.cz>
Reviewed-by: Kurt Garloff <garl...@suse.de>
---
Changelog V3:
    *fix style problems.
    *rebase to 2.6.38-rc7.
Changelog V2:
    *Check if we have devices support trim before trying to trim the fs, also 
adjust
      minlen according to the discard_granularity.
    *Update reserved extent calculations in btrfs_trim_block_group().
    *Call cond_resched() without checking need_resched()
    *Use bitmap_clear_bits() and unlink_free_space() instead of 
btrfs_remove_free_space(),
      so we won't search the same extent for twice.
    *Try harder in btrfs_discard_extent(), now we won't report errors
     if it's not a EOPNOTSUPP.
    *make sure the block group is cached before trimming it,or we'll see an 
empty caching
     tree if the block group is not cached.
    *Minor return value fix in btrfs_discard_block_group().
---
 fs/btrfs/ctree.h            |    5 ++-
 fs/btrfs/disk-io.c          |    5 ++-
 fs/btrfs/extent-tree.c      |  103 +++++++++++++++++++++++++++++++++----------
 fs/btrfs/free-space-cache.c |   92 ++++++++++++++++++++++++++++++++++++++
 fs/btrfs/free-space-cache.h |    2 +
 fs/btrfs/ioctl.c            |   46 +++++++++++++++++++
 6 files changed, 227 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f820fa..9511af9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2148,6 +2148,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
                      u64 root_objectid, u64 owner, u64 offset);
 
 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
+                               u64 num_bytes, int reserve, int sinfo);
 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
@@ -2218,9 +2220,10 @@ u64 btrfs_account_ro_block_groups_free_space(struct 
btrfs_space_info *sinfo);
 int btrfs_error_unpin_extent_range(struct btrfs_root *root,
                                   u64 start, u64 end);
 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-                              u64 num_bytes);
+                              u64 num_bytes, u64 *actual_bytes);
 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root, u64 type);
+int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
 
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e1aa8d6..bcb9451 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2947,7 +2947,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root 
*root,
                        break;
 
                /* opt_discard */
-               ret = btrfs_error_discard_extent(root, start, end + 1 - start);
+               if (btrfs_test_opt(root, DISCARD))
+                       ret = btrfs_error_discard_extent(root, start,
+                                                        end + 1 - start,
+                                                        NULL);
 
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
                btrfs_error_unpin_extent_range(root, start, end);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 588ff98..fbd7bd6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -36,8 +36,6 @@
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc);
-static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                u64 num_bytes, int reserve, int sinfo);
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 parent,
@@ -442,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache 
*cache,
         * allocate blocks for the tree root we can't do the fast caching since
         * we likely hold important locks.
         */
-       if (!trans->transaction->in_commit &&
+       if (trans && (!trans->transaction->in_commit) &&
            (root && root != root->fs_info->tree_root)) {
                spin_lock(&cache->lock);
                if (cache->cached != BTRFS_CACHE_NO) {
@@ -1740,24 +1738,22 @@ static int remove_extent_backref(struct 
btrfs_trans_handle *trans,
        return ret;
 }
 
-static void btrfs_issue_discard(struct block_device *bdev,
+static int btrfs_issue_discard(struct block_device *bdev,
                                u64 start, u64 len)
 {
-       blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
+       return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
 }
 
 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
-                               u64 num_bytes)
+                               u64 num_bytes, u64 *actual_bytes)
 {
        int ret;
        u64 map_length = num_bytes;
+       u64 discarded_bytes = 0;
        struct btrfs_multi_bio *multi = NULL;
 
-       if (!btrfs_test_opt(root, DISCARD))
-               return 0;
-
        /* Tell the block device(s) that the sectors can be discarded */
-       ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+       ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE,
                              bytenr, &map_length, &multi, 0);
        if (!ret) {
                struct btrfs_bio_stripe *stripe = multi->stripes;
@@ -1767,13 +1763,21 @@ static int btrfs_discard_extent(struct btrfs_root 
*root, u64 bytenr,
                        map_length = num_bytes;
 
                for (i = 0; i < multi->num_stripes; i++, stripe++) {
-                       btrfs_issue_discard(stripe->dev->bdev,
-                                           stripe->physical,
-                                           map_length);
+                       ret = btrfs_issue_discard(stripe->dev->bdev,
+                                                 stripe->physical,
+                                                 map_length);
+                       if (!ret)
+                               discarded_bytes += map_length;
                }
                kfree(multi);
        }
 
+       if (discarded_bytes || ret == -EOPNOTSUPP)
+               ret = 0;
+
+       if (actual_bytes)
+               *actual_bytes = discarded_bytes;
+
        return ret;
 }
 
@@ -4214,8 +4218,8 @@ int btrfs_pin_extent(struct btrfs_root *root,
  * update size of reserved extents. this function may return -EAGAIN
  * if 'reserve' is true or 'sinfo' is false.
  */
-static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                u64 num_bytes, int reserve, int sinfo)
+int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
+                               u64 num_bytes, int reserve, int sinfo)
 {
        int ret = 0;
        if (sinfo) {
@@ -4353,7 +4357,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle 
*trans,
                if (ret)
                        break;
 
-               ret = btrfs_discard_extent(root, start, end + 1 - start);
+               if (btrfs_test_opt(root, DISCARD))
+                       ret = btrfs_discard_extent(root, start,
+                                                  end + 1 - start, NULL);
 
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
                unpin_extent_range(root, start, end);
@@ -4694,10 +4700,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle 
*trans,
                WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
 
                btrfs_add_free_space(cache, buf->start, buf->len);
-               ret = update_reserved_bytes(cache, buf->len, 0, 0);
+               ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0);
                if (ret == -EAGAIN) {
                        /* block group became read-only */
-                       update_reserved_bytes(cache, buf->len, 0, 1);
+                       btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
                        goto out;
                }
 
@@ -5180,7 +5186,7 @@ checks:
                                             search_start - offset);
                BUG_ON(offset > search_start);
 
-               ret = update_reserved_bytes(block_group, num_bytes, 1,
+               ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
                                            (data & BTRFS_BLOCK_GROUP_DATA));
                if (ret == -EAGAIN) {
                        btrfs_add_free_space(block_group, offset, num_bytes);
@@ -5401,10 +5407,11 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, 
u64 start, u64 len)
                return -ENOSPC;
        }
 
-       ret = btrfs_discard_extent(root, start, len);
+       if (btrfs_test_opt(root, DISCARD))
+               ret = btrfs_discard_extent(root, start, len, NULL);
 
        btrfs_add_free_space(cache, start, len);
-       update_reserved_bytes(cache, len, 0, 1);
+       btrfs_update_reserved_bytes(cache, len, 0, 1);
        btrfs_put_block_group(cache);
 
        return ret;
@@ -5603,7 +5610,7 @@ int btrfs_alloc_logged_file_extent(struct 
btrfs_trans_handle *trans,
                put_caching_control(caching_ctl);
        }
 
-       ret = update_reserved_bytes(block_group, ins->offset, 1, 1);
+       ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1);
        BUG_ON(ret);
        btrfs_put_block_group(block_group);
        ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
@@ -8719,7 +8726,55 @@ int btrfs_error_unpin_extent_range(struct btrfs_root 
*root, u64 start, u64 end)
 }
 
 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-                              u64 num_bytes)
+                              u64 num_bytes, u64 *actual_bytes)
 {
-       return btrfs_discard_extent(root, bytenr, num_bytes);
+       return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
+}
+
+int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_block_group_cache *cache = NULL;
+       u64 group_trimmed;
+       u64 start;
+       u64 end;
+       u64 trimmed = 0;
+       int ret = 0;
+
+       cache = btrfs_lookup_block_group(fs_info, range->start);
+
+       while (cache) {
+               if (cache->key.objectid >= (range->start + range->len)) {
+                       btrfs_put_block_group(cache);
+                       break;
+               }
+
+               start = max(range->start, cache->key.objectid);
+               end = min(range->start + range->len,
+                               cache->key.objectid + cache->key.offset);
+
+               if (end - start >= range->minlen) {
+                       if (!block_group_cache_done(cache)) {
+                               ret = cache_block_group(cache, NULL, root, 0);
+                               if (!ret)
+                                       wait_block_group_cache_done(cache);
+                       }
+                       ret = btrfs_trim_block_group(cache,
+                                                    &group_trimmed,
+                                                    start,
+                                                    end,
+                                                    range->minlen);
+
+                       trimmed += group_trimmed;
+                       if (ret < 0) {
+                               btrfs_put_block_group(cache);
+                               break;
+                       }
+               }
+
+               cache = next_block_group(fs_info->tree_root, cache);
+       }
+
+       range->len = trimmed;
+       return ret;
 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index a039065..d0dc812 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2154,3 +2154,95 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster 
*cluster)
        cluster->block_group = NULL;
 }
 
+int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+                          u64 *trimmed, u64 start, u64 end, u64 minlen)
+{
+       struct btrfs_free_space *entry = NULL;
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
+       u64 bytes = 0;
+       u64 actually_trimmed;
+       int ret = 0;
+
+       *trimmed = 0;
+
+       while (start < end) {
+               spin_lock(&block_group->tree_lock);
+
+               if (block_group->free_space < minlen) {
+                       spin_unlock(&block_group->tree_lock);
+                       break;
+               }
+
+               entry = tree_search_offset(block_group, start, 0, 1);
+               if (!entry)
+                       entry = tree_search_offset(block_group,
+                                                  offset_to_bitmap(block_group,
+                                                                   start),
+                                                  1, 1);
+
+               if (!entry || entry->offset >= end) {
+                       spin_unlock(&block_group->tree_lock);
+                       break;
+               }
+
+               if (entry->bitmap) {
+                       ret = search_bitmap(block_group, entry, &start, &bytes);
+                       if (!ret) {
+                               if (start >= end) {
+                                       spin_unlock(&block_group->tree_lock);
+                                       break;
+                               }
+                               bytes = min(bytes, end - start);
+                               bitmap_clear_bits(block_group, entry,
+                                                 start, bytes);
+                               if (entry->bytes == 0)
+                                       free_bitmap(block_group, entry);
+                       } else {
+                               start = entry->offset + BITS_PER_BITMAP *
+                                       block_group->sectorsize;
+                               spin_unlock(&block_group->tree_lock);
+                               ret = 0;
+                               continue;
+                       }
+               } else {
+                       start = entry->offset;
+                       bytes = min(entry->bytes, end - start);
+                       unlink_free_space(block_group, entry);
+                       kfree(entry);
+               }
+
+               spin_unlock(&block_group->tree_lock);
+
+               if (bytes >= minlen) {
+                       int update_ret;
+                       update_ret = btrfs_update_reserved_bytes(block_group,
+                                                                bytes, 1, 1);
+
+                       ret = btrfs_error_discard_extent(fs_info->extent_root,
+                                                        start,
+                                                        bytes,
+                                                        &actually_trimmed);
+
+                       btrfs_add_free_space(block_group,
+                                            start, bytes);
+                       if (!update_ret)
+                               btrfs_update_reserved_bytes(block_group,
+                                                           bytes, 0, 1);
+
+                       if (ret)
+                               break;
+                       *trimmed += actually_trimmed;
+               }
+               start += bytes;
+               bytes = 0;
+
+               if (fatal_signal_pending(current)) {
+                       ret = -ERESTARTSYS;
+                       break;
+               }
+
+               cond_resched();
+       }
+
+       return ret;
+}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index e49ca5c..65c3b93 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache 
*block_group,
 int btrfs_return_cluster_to_free_space(
                               struct btrfs_block_group_cache *block_group,
                               struct btrfs_free_cluster *cluster);
+int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+                          u64 *trimmed, u64 start, u64 end, u64 minlen);
 #endif
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5fdb2ab..eff9228 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -40,6 +40,7 @@
 #include <linux/xattr.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -225,6 +226,49 @@ static int btrfs_ioctl_getversion(struct file *file, int 
__user *arg)
        return put_user(inode->i_generation, arg);
 }
 
+static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
+{
+       struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_device *device;
+       struct request_queue *q;
+       struct fstrim_range range;
+       u64 minlen = ULLONG_MAX;
+       u64 num_devices = 0;
+       int ret;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       mutex_lock(&fs_info->fs_devices->device_list_mutex);
+       list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
+               if (!device->bdev)
+                       continue;
+               q = bdev_get_queue(device->bdev);
+               if (blk_queue_discard(q)) {
+                       num_devices++;
+                       minlen = min((u64)q->limits.discard_granularity,
+                                    minlen);
+               }
+       }
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+       if (!num_devices)
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&range, arg, sizeof(range)))
+               return -EFAULT;
+
+       range.minlen = max(range.minlen, minlen);
+       ret = btrfs_trim_fs(root, &range);
+       if (ret < 0)
+               return ret;
+
+       if (copy_to_user(arg, &range, sizeof(range)))
+               return -EFAULT;
+
+       return 0;
+}
+
 static noinline int create_subvol(struct btrfs_root *root,
                                  struct dentry *dentry,
                                  char *name, int namelen,
@@ -2388,6 +2432,8 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_setflags(file, argp);
        case FS_IOC_GETVERSION:
                return btrfs_ioctl_getversion(file, argp);
+       case FITRIM:
+               return btrfs_ioctl_fitrim(file, argp);
        case BTRFS_IOC_SNAP_CREATE:
                return btrfs_ioctl_snap_create(file, argp, 0);
        case BTRFS_IOC_SNAP_CREATE_V2:
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to