On 22.02.2018 06:52, Qu Wenruo wrote: > btrfs_read_block_groups() is used to build up the block group cache for > all block groups, so it will iterate all block group items in extent > tree. > > For large filesystem (TB level), it will search for BLOCK_GROUP_ITEM > thousands times, which is the most time consuming part of mounting > btrfs. > > So this patch will try to speed it up by: > > 1) Avoid unnecessary readahead > We were using READA_FORWARD to search for block group item. > However block group items are in fact scattered across quite a lot of > leaves. Doing readahead will just waste our IO (especially important > for HDD). > > In real world case, for a filesystem with 3T used space, it would > have about 50K extent tree leaves, but only have 3K block group > items. Meaning we need to iterate 16 leaves to meet one block group > on average. > > So readahead won't help but waste slow HDD seeks. > > 2) Use chunk mapping to locate block group items > Since one block group item always has one corresponding chunk item, > we could use chunk mapping to get the block group item size. > > With block group item size, we can do a pinpoint tree search, instead > of searching with some uncertain value and do forward search. > > In some case, like next BLOCK_GROUP_ITEM is in the next leaf of > current path, we could save such unnecessary tree block read. > > Cc: Ellis H. Wilson III <ell...@panasas.com> > Signed-off-by: Qu Wenruo <w...@suse.com> > --- > Since all my TB level storage is all occupied by my NAS, any feedback > (especially for the real world mount speed change) is welcome. > --- > fs/btrfs/extent-tree.c | 88 > +++++++++++++++----------------------------------- > 1 file changed, 26 insertions(+), 62 deletions(-) > > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index 2f4328511ac8..a3faa0cbe056 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -9713,60 +9713,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, > u64 bytenr) > return ret; > } > > -static int find_first_block_group(struct btrfs_fs_info *fs_info, > - struct btrfs_path *path, > - struct btrfs_key *key) > -{ > - struct btrfs_root *root = fs_info->extent_root; > - int ret = 0; > - struct btrfs_key found_key; > - struct extent_buffer *leaf; > - int slot; > - > - ret = btrfs_search_slot(NULL, root, key, path, 0, 0); > - if (ret < 0) > - goto out; > - > - while (1) { > - slot = path->slots[0]; > - leaf = path->nodes[0]; > - if (slot >= btrfs_header_nritems(leaf)) { > - ret = btrfs_next_leaf(root, path); > - if (ret == 0) > - continue; > - if (ret < 0) > - goto out; > - break; > - } > - btrfs_item_key_to_cpu(leaf, &found_key, slot); > - > - if (found_key.objectid >= key->objectid && > - found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { > - struct extent_map_tree *em_tree; > - struct extent_map *em; > - > - em_tree = &root->fs_info->mapping_tree.map_tree; > - read_lock(&em_tree->lock); > - em = lookup_extent_mapping(em_tree, found_key.objectid, > - found_key.offset); > - read_unlock(&em_tree->lock); > - if (!em) { > - btrfs_err(fs_info, > - "logical %llu len %llu found bg but no related chunk", > - found_key.objectid, found_key.offset); > - ret = -ENOENT; > - } else { > - ret = 0; > - } > - free_extent_map(em); > - goto out; > - } > - path->slots[0]++; > - } > -out: > - return ret; > -} > - > void btrfs_put_block_group_cache(struct btrfs_fs_info *info) > { > struct btrfs_block_group_cache *block_group; > @@ -9988,12 +9934,15 @@ int btrfs_read_block_groups(struct btrfs_fs_info > *info) > { > struct btrfs_path *path; > int ret; > + struct btrfs_mapping_tree *map_tree = &info->mapping_tree; > + struct btrfs_root *extent_root = info->extent_root; > struct btrfs_block_group_cache *cache; > struct btrfs_space_info *space_info; > struct btrfs_key key; > struct btrfs_key found_key; > struct extent_buffer *leaf; > int need_clear = 0; > + u64 cur = 0; > u64 cache_gen; > u64 feature; > int mixed; > @@ -10001,13 +9950,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info > *info) > feature = btrfs_super_incompat_flags(info->super_copy); > mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS); > > - key.objectid = 0; > - key.offset = 0; > - key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; > path = btrfs_alloc_path(); > if (!path) > return -ENOMEM; > - path->reada = READA_FORWARD; > > cache_gen = btrfs_super_cache_generation(info->super_copy); > if (btrfs_test_opt(info, SPACE_CACHE) && > @@ -10017,10 +9962,30 @@ int btrfs_read_block_groups(struct btrfs_fs_info > *info) > need_clear = 1; > > while (1) { > - ret = find_first_block_group(info, path, &key); > - if (ret > 0) > + struct extent_map *em; > + > + read_lock(&map_tree->map_tree.lock); > + em = lookup_extent_mapping(&map_tree->map_tree, cur, > + ((u64)-1) - cur); > + read_unlock(&map_tree->map_tree.lock); > + if (!em) > break; > - if (ret != 0) > + > + key.objectid = em->start; > + key.offset = em->len; > + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; > + cur = em->start + em->len; > + free_extent_map(em); > + > + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); > + if (ret > 0) { > + WARN(1, KERN_ERR > + "chunk [%llu %llu) doesn't has its block group item\n",
I'd rephrase this to "chunk [%llu %llu) doesn't have matching block group item" > + key.objectid, key.objectid + key.offset); > + ret = -ENOENT; > + goto error; > + } Looks good, howevr when the time for merging comes I'd rather have this code be part of a function named find_block_group or some such. Let's see if this code brings any improvements and then bikeshed on the details. > + if (ret < 0) > goto error; > > leaf = path->nodes[0]; > @@ -10062,7 +10027,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info > *info) > goto error; > } > > - key.objectid = found_key.objectid + found_key.offset; > btrfs_release_path(path); > > /* > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html