Reviewed-and-Tested-by: Goldwyn Rodrigues <rgold...@suse.com>

On 10/17/2016 08:31 PM, Qu Wenruo wrote:
> Commit 62b99540a1d91e464 (btrfs: relocation: Fix leaking qgroups numbers
> on data extents) only fixes the problem partly.
> 
> The previous fix is to trace all new data extents at transaction commit
> time when balance finishes.
> 
> However balance is not done in a large transaction, every path
> replacement can happen in its own transaction.
> This makes the fix useless if transaction commits during relocation.
> 
> For example:
> relocate_block_group()
> |-merge_reloc_roots()
> |  |- merge_reloc_root()
> |     |- btrfs_start_transaction()         <- Trans X
> |     |- replace_path()                    <- Cause leak
> |     |- btrfs_end_transaction_throttle()  <- Trans X commits here
> |     |                                       Leak not fixed
> |     |
> |     |- btrfs_start_transaction()         <- Trans Y
> |     |- replace_path()                    <- Cause leak
> |     |- btrfs_end_transaction_throttle()  <- Trans Y ends
> |                                             but not committed
> |-btrfs_join_transaction()                 <- Still trans Y
> |-qgroup_fix()                             <- Only fixes data leak
> |                                             in trans Y
> |-btrfs_commit_transaction()               <- Trans Y commits
> 
> In that case, qgroup fixup can only fix data leak in trans Y, data leak
> in trans X is out of fix.
> 
> So the correct fix should happens in the same transaction of
> replace_path().
> 
> This patch fixes it by tracing both subtrees of tree block swap, so it
> can fix the problem and ensure all leaking and fix are in the same
> transaction, so no leak again.
> 
> Reported-by: Goldwyn Rodrigues <rgold...@suse.com>
> Signed-off-by: Qu Wenruo <quwen...@cn.fujitsu.com>
> ---
>  fs/btrfs/relocation.c | 119 
> ++++++++++----------------------------------------
>  1 file changed, 23 insertions(+), 96 deletions(-)
> 
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index 33cde30..540f225 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -1901,6 +1901,29 @@ again:
>               BUG_ON(ret);
>  
>               /*
> +              * Info qgroup to trace both subtrees.
> +              *
> +              * We must trace both trees.
> +              * 1) Tree reloc subtree
> +              *    If not traced, we will leak data numbers
> +              * 2) Fs subtree
> +              *    If not traced, we will double count old data
> +              *    and tree block numbers, if current trans doesn't free
> +              *    data reloc tree inode.
> +              */
> +             ret = btrfs_qgroup_trace_subtree(trans, src, parent,
> +                             btrfs_header_generation(parent),
> +                             btrfs_header_level(parent));
> +             if (ret < 0)
> +                     break;
> +             ret = btrfs_qgroup_trace_subtree(trans, dest,
> +                             path->nodes[level],
> +                             btrfs_header_generation(path->nodes[level]),
> +                             btrfs_header_level(path->nodes[level]));
> +             if (ret < 0)
> +                     break;
> +
> +             /*
>                * swap blocks in fs tree and reloc tree.
>                */
>               btrfs_set_node_blockptr(parent, slot, new_bytenr);
> @@ -3942,90 +3965,6 @@ int prepare_to_relocate(struct reloc_control *rc)
>       return 0;
>  }
>  
> -/*
> - * Qgroup fixer for data chunk relocation.
> - * The data relocation is done in the following steps
> - * 1) Copy data extents into data reloc tree
> - * 2) Create tree reloc tree(special snapshot) for related subvolumes
> - * 3) Modify file extents in tree reloc tree
> - * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
> - *
> - * The problem is, data and tree reloc tree are not accounted to qgroup,
> - * and 4) will only info qgroup to track tree blocks change, not file extents
> - * in the tree blocks.
> - *
> - * The good news is, related data extents are all in data reloc tree, so we
> - * only need to info qgroup to track all file extents in data reloc tree
> - * before commit trans.
> - */
> -static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle 
> *trans,
> -                                          struct reloc_control *rc)
> -{
> -     struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
> -     struct inode *inode = rc->data_inode;
> -     struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
> -     struct btrfs_path *path;
> -     struct btrfs_key key;
> -     int ret = 0;
> -
> -     if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
> -             return 0;
> -
> -     /*
> -      * Only for stage where we update data pointers the qgroup fix is
> -      * valid.
> -      * For MOVING_DATA stage, we will miss the timing of swapping tree
> -      * blocks, and won't fix it.
> -      */
> -     if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
> -             return 0;
> -
> -     path = btrfs_alloc_path();
> -     if (!path)
> -             return -ENOMEM;
> -     key.objectid = btrfs_ino(inode);
> -     key.type = BTRFS_EXTENT_DATA_KEY;
> -     key.offset = 0;
> -
> -     ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
> -     if (ret < 0)
> -             goto out;
> -
> -     lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
> -     while (1) {
> -             struct btrfs_file_extent_item *fi;
> -
> -             btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
> -             if (key.objectid > btrfs_ino(inode))
> -                     break;
> -             if (key.type != BTRFS_EXTENT_DATA_KEY)
> -                     goto next;
> -             fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
> -                                 struct btrfs_file_extent_item);
> -             if (btrfs_file_extent_type(path->nodes[0], fi) !=
> -                             BTRFS_FILE_EXTENT_REG)
> -                     goto next;
> -             ret = btrfs_qgroup_trace_extent(trans, fs_info,
> -                     btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
> -                     btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
> -                     GFP_NOFS);
> -             if (ret < 0)
> -                     break;
> -next:
> -             ret = btrfs_next_item(data_reloc_root, path);
> -             if (ret < 0)
> -                     break;
> -             if (ret > 0) {
> -                     ret = 0;
> -                     break;
> -             }
> -     }
> -     unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
> -out:
> -     btrfs_free_path(path);
> -     return ret;
> -}
> -
>  static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
>  {
>       struct rb_root blocks = RB_ROOT;
> @@ -4216,13 +4155,6 @@ restart:
>               err = PTR_ERR(trans);
>               goto out_free;
>       }
> -     ret = qgroup_fix_relocated_data_extents(trans, rc);
> -     if (ret < 0) {
> -             btrfs_abort_transaction(trans, ret);
> -             if (!err)
> -                     err = ret;
> -             goto out_free;
> -     }
>       btrfs_commit_transaction(trans, rc->extent_root);
>  out_free:
>       btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
> @@ -4591,11 +4523,6 @@ int btrfs_recover_relocation(struct btrfs_root *root)
>               err = PTR_ERR(trans);
>               goto out_free;
>       }
> -     err = qgroup_fix_relocated_data_extents(trans, rc);
> -     if (err < 0) {
> -             btrfs_abort_transaction(trans, err);
> -             goto out_free;
> -     }
>       err = btrfs_commit_transaction(trans, rc->extent_root);
>  out_free:
>       kfree(rc);
> 

-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to