On Tue, Nov 30, 2010 at 08:46, Josef Bacik <jo...@redhat.com> wrote:
> Btrfs only allocates chunks as we need them, however we do not delete chunks 
> as
> we stop using them.  This patch adds this capability.  Whenever we clear the
> last bit of used space in a block group we try and mark it read only, and then
> when the last pinned space is finally removed we queue up the deletion work.
> I've tested this with xfstests and my enospc tests.  When filling up the disk
> I see that we've allocated the entire disk of chunks, and then when I do rm *
> there is a bunch of space freed up.  Thanks,

Stupid user question:

I have a btrfs filesystem on a 2.6.36 kernel that used to have ~800GB
of data on it.  Then I deleted ~500GB of it (moved it elsewhere), but
my space usage as reported by df and the btrfs tool didn't decrease
appreciably.  Might this be why?

Thanks,
Josh



> Signed-off-by: Josef Bacik <jo...@redhat.com>
> ---
>  fs/btrfs/ctree.h       |    3 +
>  fs/btrfs/extent-tree.c |  148 ++++++++++++++++++++++++++++++++++++++++++-----
>  fs/btrfs/volumes.c     |   52 +++++++++++------
>  fs/btrfs/volumes.h     |    4 +
>  4 files changed, 174 insertions(+), 33 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 8db9234..50ec64b 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -839,6 +839,9 @@ struct btrfs_block_group_cache {
>         * Today it will only have one thing on it, but that may change
>         */
>        struct list_head cluster_list;
> +
> +       /* Worker for deleting the block group if its empty */
> +       struct btrfs_work work;
>  };
>
>  struct reloc_control;
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 43aa62a..87aae66 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -64,6 +64,11 @@ static int find_next_key(struct btrfs_path *path, int 
> level,
>                         struct btrfs_key *key);
>  static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
>                            int dump_block_groups);
> +static int btrfs_set_block_group_ro_trans(struct btrfs_trans_handle *trans,
> +                                         struct btrfs_root *root,
> +                                         struct btrfs_block_group_cache
> +                                         *cache);
> +static int set_block_group_ro_lock(struct btrfs_block_group_cache *cache);
>
>  static noinline int
>  block_group_cache_done(struct btrfs_block_group_cache *cache)
> @@ -4052,6 +4057,7 @@ static int update_block_group(struct btrfs_trans_handle 
> *trans,
>        u64 old_val;
>        u64 byte_in_group;
>        int factor;
> +       int empty = 0;
>
>        /* block accounting for super block */
>        spin_lock(&info->delalloc_lock);
> @@ -4064,6 +4070,7 @@ static int update_block_group(struct btrfs_trans_handle 
> *trans,
>        spin_unlock(&info->delalloc_lock);
>
>        while (total) {
> +               empty = 0;
>                cache = btrfs_lookup_block_group(info, bytenr);
>                if (!cache)
>                        return -1;
> @@ -4096,6 +4103,12 @@ static int update_block_group(struct 
> btrfs_trans_handle *trans,
>                old_val = btrfs_block_group_used(&cache->item);
>                num_bytes = min(total, cache->key.offset - byte_in_group);
>                if (alloc) {
> +                       /*
> +                        * We raced with setting the block group read only, we
> +                        * need to change it back to rw
> +                        */
> +                       if (cache->ro)
> +                               empty = -1;
>                        old_val += num_bytes;
>                        btrfs_set_block_group_used(&cache->item, old_val);
>                        cache->reserved -= num_bytes;
> @@ -4106,6 +4119,8 @@ static int update_block_group(struct btrfs_trans_handle 
> *trans,
>                        spin_unlock(&cache->space_info->lock);
>                } else {
>                        old_val -= num_bytes;
> +                       if (old_val == 0)
> +                               empty = 1;
>                        btrfs_set_block_group_used(&cache->item, old_val);
>                        cache->pinned += num_bytes;
>                        cache->space_info->bytes_pinned += num_bytes;
> @@ -4118,6 +4133,29 @@ static int update_block_group(struct 
> btrfs_trans_handle *trans,
>                                         bytenr, bytenr + num_bytes - 1,
>                                         GFP_NOFS | __GFP_NOFAIL);
>                }
> +               /*
> +                * So we need to deal with 2 cases here
> +                *
> +                * 1) empty == 1, which means the block group is empty and
> +                * needs to be marked ro so we can remove it later
> +                *
> +                * -or-
> +                *
> +                * 2) empty == -1, which means the block group was previously
> +                * empty and marked read only, but not before somebody tried 
> to
> +                * make an allocation, so go ahead and mark it rw.
> +                */
> +               switch (empty) {
> +               case -1:
> +                       btrfs_set_block_group_rw(root, cache);
> +                       break;
> +               case 1:
> +                       btrfs_set_block_group_ro_trans(trans, root, cache);
> +                       break;
> +               default:
> +                       break;
> +               }
> +
>                btrfs_put_block_group(cache);
>                total -= num_bytes;
>                bytenr += num_bytes;
> @@ -4288,6 +4326,17 @@ static int unpin_extent_range(struct btrfs_root *root, 
> u64 start, u64 end)
>                        cache->reserved_pinned -= len;
>                        cache->space_info->bytes_reserved += len;
>                }
> +
> +               if (btrfs_block_group_used(&cache->item) == 0 &&
> +                   cache->pinned == 0) {
> +                       int ret = 0;
> +
> +                       if (!cache->ro)
> +                               ret = set_block_group_ro_lock(cache);
> +                       if (!ret)
> +                               btrfs_queue_worker(&fs_info->generic_worker,
> +                                                  &cache->work);
> +               }
>                spin_unlock(&cache->lock);
>                spin_unlock(&cache->space_info->lock);
>        }
> @@ -7905,7 +7954,7 @@ static u64 update_block_group_flags(struct btrfs_root 
> *root, u64 flags)
>        return flags;
>  }
>
> -static int set_block_group_ro(struct btrfs_block_group_cache *cache)
> +static int set_block_group_ro_lock(struct btrfs_block_group_cache *cache)
>  {
>        struct btrfs_space_info *sinfo = cache->space_info;
>        u64 num_bytes;
> @@ -7914,8 +7963,6 @@ static int set_block_group_ro(struct 
> btrfs_block_group_cache *cache)
>        if (cache->ro)
>                return 0;
>
> -       spin_lock(&sinfo->lock);
> -       spin_lock(&cache->lock);
>        num_bytes = cache->key.offset - cache->reserved - cache->pinned -
>                    cache->bytes_super - btrfs_block_group_used(&cache->item);
>
> @@ -7928,37 +7975,67 @@ static int set_block_group_ro(struct 
> btrfs_block_group_cache *cache)
>                cache->ro = 1;
>                ret = 0;
>        }
> +
> +       return ret;
> +}
> +
> +static int set_block_group_ro(struct btrfs_block_group_cache *cache)
> +{
> +       struct btrfs_space_info *sinfo = cache->space_info;
> +       int ret;
> +
> +       spin_lock(&sinfo->lock);
> +       spin_lock(&cache->lock);
> +       ret = set_block_group_ro_lock(cache);
>        spin_unlock(&cache->lock);
>        spin_unlock(&sinfo->lock);
> +
>        return ret;
>  }
>
> -int btrfs_set_block_group_ro(struct btrfs_root *root,
> -                            struct btrfs_block_group_cache *cache)
> -
> +static int btrfs_set_block_group_ro_trans(struct btrfs_trans_handle *trans,
> +                                         struct btrfs_root *root,
> +                                         struct btrfs_block_group_cache
> +                                         *cache)
>  {
> -       struct btrfs_trans_handle *trans;
>        u64 alloc_flags;
>        int ret;
> +       bool alloc = true;
>
> -       BUG_ON(cache->ro);
> +       /*
> +        * If we're trying to set the block group as read only in a 
> transaction
> +        * commit then avoid doing the chunk alloc to make lockdep happy.
> +        */
> +       if (trans->transaction->in_commit)
> +               alloc = false;
>
> -       trans = btrfs_join_transaction(root, 1);
> -       BUG_ON(IS_ERR(trans));
> +       if (cache->ro)
> +               return 0;
>
>        alloc_flags = update_block_group_flags(root, cache->flags);
> -       if (alloc_flags != cache->flags)
> +       if (alloc && alloc_flags != cache->flags)
>                do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
>
>        ret = set_block_group_ro(cache);
> -       if (!ret)
> -               goto out;
> +       if (!ret || !alloc)
> +               return ret;
>        alloc_flags = get_alloc_profile(root, cache->space_info->flags);
>        ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
>        if (ret < 0)
> -               goto out;
> -       ret = set_block_group_ro(cache);
> -out:
> +               return ret;
> +       return set_block_group_ro(cache);
> +}
> +
> +int btrfs_set_block_group_ro(struct btrfs_root *root,
> +                            struct btrfs_block_group_cache *cache)
> +{
> +       struct btrfs_trans_handle *trans;
> +       int ret;
> +
> +       trans = btrfs_join_transaction(root, 0);
> +       if (IS_ERR(trans))
> +               return PTR_ERR(trans);
> +       ret = btrfs_set_block_group_ro_trans(trans, root, cache);
>        btrfs_end_transaction(trans, root);
>        return ret;
>  }
> @@ -8206,6 +8283,43 @@ static void __link_block_group(struct btrfs_space_info 
> *space_info,
>        up_write(&space_info->groups_sem);
>  }
>
> +static void block_group_delete_fn(struct btrfs_work *work)
> +{
> +       struct btrfs_block_group_cache *cache;
> +       struct btrfs_fs_info *info;
> +       struct btrfs_trans_handle *trans;
> +       struct btrfs_root *root;
> +       u64 chunk_tree;
> +       u64 chunk_objectid;
> +       int ret;
> +
> +       /*
> +        * If anything fails in here, just mark the block group as rw and
> +        * return.
> +        */
> +       cache = container_of(work, struct btrfs_block_group_cache, work);
> +       info = cache->fs_info;
> +       root = info->extent_root;
> +       chunk_tree = info->chunk_root->root_key.objectid;
> +       chunk_objectid = btrfs_block_group_chunk_objectid(&cache->item);
> +
> +       if (!cache->ro) {
> +               WARN_ON_ONCE(1);
> +               return;
> +       }
> +
> +       trans = btrfs_start_transaction(info->extent_root, 0);
> +       if (IS_ERR(trans)) {
> +               btrfs_set_block_group_rw(root, cache);
> +               return;
> +       }
> +       ret = btrfs_remove_chunk(trans, root, chunk_tree, chunk_objectid,
> +                                cache->key.objectid);
> +       if (ret)
> +               btrfs_set_block_group_rw(root, cache);
> +       btrfs_end_transaction(trans, root);
> +}
> +
>  int btrfs_read_block_groups(struct btrfs_root *root)
>  {
>        struct btrfs_path *path;
> @@ -8257,6 +8371,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
>                cache->fs_info = info;
>                INIT_LIST_HEAD(&cache->list);
>                INIT_LIST_HEAD(&cache->cluster_list);
> +               cache->work.func = block_group_delete_fn;
>
>                if (need_clear)
>                        cache->disk_cache_state = BTRFS_DC_CLEAR;
> @@ -8379,6 +8494,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
> *trans,
>        spin_lock_init(&cache->tree_lock);
>        INIT_LIST_HEAD(&cache->list);
>        INIT_LIST_HEAD(&cache->cluster_list);
> +       cache->work.func = block_group_delete_fn;
>
>        btrfs_set_block_group_used(&cache->item, bytes_used);
>        btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index cc04dc1..49c055b 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -1726,13 +1726,13 @@ static int btrfs_del_sys_chunk(struct btrfs_root 
> *root, u64 chunk_objectid, u64
>        return ret;
>  }
>
> -static int btrfs_relocate_chunk(struct btrfs_root *root,
> -                        u64 chunk_tree, u64 chunk_objectid,
> -                        u64 chunk_offset)
> +int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
> +                      struct btrfs_root *root,
> +                      u64 chunk_tree, u64 chunk_objectid,
> +                      u64 chunk_offset)
>  {
>        struct extent_map_tree *em_tree;
>        struct btrfs_root *extent_root;
> -       struct btrfs_trans_handle *trans;
>        struct extent_map *em;
>        struct map_lookup *map;
>        int ret;
> @@ -1742,18 +1742,6 @@ static int btrfs_relocate_chunk(struct btrfs_root 
> *root,
>        extent_root = root->fs_info->extent_root;
>        em_tree = &root->fs_info->mapping_tree.map_tree;
>
> -       ret = btrfs_can_relocate(extent_root, chunk_offset);
> -       if (ret)
> -               return -ENOSPC;
> -
> -       /* step one, relocate all the extents inside this chunk */
> -       ret = btrfs_relocate_block_group(extent_root, chunk_offset);
> -       if (ret)
> -               return ret;
> -
> -       trans = btrfs_start_transaction(root, 0);
> -       BUG_ON(!trans);
> -
>        lock_chunks(root);
>
>        /*
> @@ -1804,10 +1792,40 @@ static int btrfs_relocate_chunk(struct btrfs_root 
> *root,
>        free_extent_map(em);
>
>        unlock_chunks(root);
> -       btrfs_end_transaction(trans, root);
>        return 0;
>  }
>
> +static int btrfs_relocate_chunk(struct btrfs_root *root,
> +                        u64 chunk_tree, u64 chunk_objectid,
> +                        u64 chunk_offset)
> +{
> +       struct btrfs_root *extent_root;
> +       struct btrfs_trans_handle *trans;
> +       int ret;
> +
> +       root = root->fs_info->chunk_root;
> +       extent_root = root->fs_info->extent_root;
> +
> +       ret = btrfs_can_relocate(extent_root, chunk_offset);
> +       if (ret)
> +               return -ENOSPC;
> +
> +       /* step one, relocate all the extents inside this chunk */
> +       ret = btrfs_relocate_block_group(extent_root, chunk_offset);
> +       if (ret)
> +               return ret;
> +
> +       trans = btrfs_start_transaction(root, 0);
> +       BUG_ON(!trans);
> +
> +       ret = btrfs_remove_chunk(trans, root, chunk_tree, chunk_objectid,
> +                                chunk_offset);
> +
> +       btrfs_end_transaction(trans, root);
> +
> +       return ret;
> +}
> +
>  static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
>  {
>        struct btrfs_root *chunk_root = root->fs_info->chunk_root;
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 2b638b6..4917cc0 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -183,4 +183,8 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 
> chunk_offset);
>  int find_free_dev_extent(struct btrfs_trans_handle *trans,
>                         struct btrfs_device *device, u64 num_bytes,
>                         u64 *start, u64 *max_avail);
> +int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
> +                      struct btrfs_root *root,
> +                      u64 chunk_tree, u64 chunk_objectid,
> +                      u64 chunk_offset);
>  #endif
> --
> 1.6.6.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

Reply via email to