On 08/09/2016 03:30 AM, Qu Wenruo wrote: > When balancing data extents, qgroup will leak all its numbers for > relocated data extents. > > The relocation is done in the following steps for data extents: > 1) Create data reloc tree and inode > 2) Copy all data extents to data reloc tree > And commit transaction > 3) Create tree reloc tree(special snapshot) for any related subvolumes > 4) Replace file extent in tree reloc tree with new extents in data reloc > tree > And commit transaction > 5) Merge tree reloc tree with original fs, by swapping tree blocks > > For 1)~4), since tree reloc tree and data reloc tree doesn't count to > qgroup, everything is OK. > > But for 5), the swapping of tree blocks will only info qgroup to track > metadata extents. > > If metadata extents contain file extents, qgroup number for file extents > will get lost, leading to corrupted qgroup accounting. > > The fix is, before commit transaction of step 5), manually info qgroup to > track all file extents in data reloc tree. > Since at commit transaction time, the tree swapping is done, and qgroup > will account these data extents correctly. > > Cc: Mark Fasheh <mfas...@suse.de> > Reported-by: Mark Fasheh <mfas...@suse.de> > Reported-by: Filipe Manana <fdman...@gmail.com> > Signed-off-by: Qu Wenruo <quwen...@cn.fujitsu.com>
Tested-by: Goldwyn Rodrigues <rgold...@suse.com> > --- > fs/btrfs/relocation.c | 114 > +++++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 108 insertions(+), 6 deletions(-) > > diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c > index b26a5ae..a6ace8a 100644 > --- a/fs/btrfs/relocation.c > +++ b/fs/btrfs/relocation.c > @@ -31,6 +31,7 @@ > #include "async-thread.h" > #include "free-space-cache.h" > #include "inode-map.h" > +#include "qgroup.h" > > /* > * backref_node, mapping_node and tree_block start with this > @@ -3916,6 +3917,95 @@ int prepare_to_relocate(struct reloc_control *rc) > return 0; > } > > +/* > + * Qgroup fixer for data chunk relocation. > + * The data relocation is done in the following steps > + * 1) Copy data extents into data reloc tree > + * 2) Create tree reloc tree(special snapshot) for related subvolumes > + * 3) Modify file extents in tree reloc tree > + * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks > + * > + * The problem is, data and tree reloc tree are not accounted to qgroup, > + * and 4) will only info qgroup to track tree blocks change, not file extents > + * in the tree blocks. > + * > + * The good news is, related data extents are all in data reloc tree, so we > + * only need to info qgroup to track all file extents in data reloc tree > + * before commit trans. > + */ > +static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle > *trans, > + struct reloc_control *rc) > +{ > + struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; > + struct inode *inode = rc->data_inode; > + struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root; > + struct btrfs_path *path; > + struct btrfs_key key; > + int ret = 0; > + > + if (!fs_info->quota_enabled) > + return 0; > + > + /* > + * Only for stage where we update data pointers the qgroup fix is > + * valid. > + * For MOVING_DATA stage, we will miss the timing of swapping tree > + * blocks, and won't fix it. > + */ > + if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found)) > + return 0; > + > + path = btrfs_alloc_path(); > + if (!path) > + return -ENOMEM; > + key.objectid = btrfs_ino(inode); > + key.type = BTRFS_EXTENT_DATA_KEY; > + key.offset = 0; > + > + ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0); > + if (ret < 0) > + goto out; > + > + lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1); > + while (1) { > + struct btrfs_file_extent_item *fi; > + > + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); > + if (key.objectid > btrfs_ino(inode)) > + break; > + if (key.type != BTRFS_EXTENT_DATA_KEY) > + goto next; > + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], > + struct btrfs_file_extent_item); > + if (btrfs_file_extent_type(path->nodes[0], fi) != > + BTRFS_FILE_EXTENT_REG) > + goto next; > + /* > + pr_info("disk bytenr: %llu, num_bytes: %llu\n", > + btrfs_file_extent_disk_bytenr(path->nodes[0], fi), > + btrfs_file_extent_disk_num_bytes(path->nodes[0], fi)); > + */ > + ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info, > + btrfs_file_extent_disk_bytenr(path->nodes[0], fi), > + btrfs_file_extent_disk_num_bytes(path->nodes[0], fi), > + GFP_NOFS); > + if (ret < 0) > + break; > +next: > + ret = btrfs_next_item(data_reloc_root, path); > + if (ret < 0) > + break; > + if (ret > 0) { > + ret = 0; > + break; > + } > + } > + unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1); > +out: > + btrfs_free_path(path); > + return ret; > +} > + > static noinline_for_stack int relocate_block_group(struct reloc_control *rc) > { > struct rb_root blocks = RB_ROOT; > @@ -4102,10 +4192,16 @@ restart: > > /* get rid of pinned extents */ > trans = btrfs_join_transaction(rc->extent_root); > - if (IS_ERR(trans)) > + if (IS_ERR(trans)) { > err = PTR_ERR(trans); > - else > - btrfs_commit_transaction(trans, rc->extent_root); > + goto out_free; > + } > + err = qgroup_fix_relocated_data_extents(trans, rc); > + if (err < 0) { > + btrfs_abort_transaction(trans, err); > + goto out_free; > + } > + btrfs_commit_transaction(trans, rc->extent_root); > out_free: > btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); > btrfs_free_path(path); > @@ -4468,10 +4564,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) > unset_reloc_control(rc); > > trans = btrfs_join_transaction(rc->extent_root); > - if (IS_ERR(trans)) > + if (IS_ERR(trans)) { > err = PTR_ERR(trans); > - else > - err = btrfs_commit_transaction(trans, rc->extent_root); > + goto out_free; > + } > + err = qgroup_fix_relocated_data_extents(trans, rc); > + if (err < 0) { > + btrfs_abort_transaction(trans, err); > + goto out_free; > + } > + err = btrfs_commit_transaction(trans, rc->extent_root); > out_free: > kfree(rc); > out: > -- Goldwyn -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html