btrfs_read_block_groups() is used to build up the block group cache for all block groups, so it will iterate all block group items in extent tree.
For large filesystem (TB level), it will search for BLOCK_GROUP_ITEM thousands times, which is the most time consuming part of mounting btrfs. So this patch will try to speed it up by: 1) Avoid unnecessary readahead We were using READA_FORWARD to search for block group item. However block group items are in fact scattered across quite a lot of leaves. Doing readahead will just waste our IO (especially important for HDD). In real world case, for a filesystem with 3T used space, it would have about 50K extent tree leaves, but only have 3K block group items. Meaning we need to iterate 16 leaves to meet one block group on average. So readahead won't help but waste slow HDD seeks. 2) Use chunk mapping to locate block group items Since one block group item always has one corresponding chunk item, we could use chunk mapping to get the block group item size. With block group item size, we can do a pinpoint tree search, instead of searching with some uncertain value and do forward search. In some case, like next BLOCK_GROUP_ITEM is in the next leaf of current path, we could save such unnecessary tree block read. Cc: Ellis H. Wilson III <ell...@panasas.com> Signed-off-by: Qu Wenruo <w...@suse.com> --- Since all my TB level storage is all occupied by my NAS, any feedback (especially for the real world mount speed change) is welcome. --- fs/btrfs/extent-tree.c | 88 +++++++++++++++----------------------------------- 1 file changed, 26 insertions(+), 62 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2f4328511ac8..a3faa0cbe056 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9713,60 +9713,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) return ret; } -static int find_first_block_group(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, - struct btrfs_key *key) -{ - struct btrfs_root *root = fs_info->extent_root; - int ret = 0; - struct btrfs_key found_key; - struct extent_buffer *leaf; - int slot; - - ret = btrfs_search_slot(NULL, root, key, path, 0, 0); - if (ret < 0) - goto out; - - while (1) { - slot = path->slots[0]; - leaf = path->nodes[0]; - if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto out; - break; - } - btrfs_item_key_to_cpu(leaf, &found_key, slot); - - if (found_key.objectid >= key->objectid && - found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { - struct extent_map_tree *em_tree; - struct extent_map *em; - - em_tree = &root->fs_info->mapping_tree.map_tree; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, found_key.objectid, - found_key.offset); - read_unlock(&em_tree->lock); - if (!em) { - btrfs_err(fs_info, - "logical %llu len %llu found bg but no related chunk", - found_key.objectid, found_key.offset); - ret = -ENOENT; - } else { - ret = 0; - } - free_extent_map(em); - goto out; - } - path->slots[0]++; - } -out: - return ret; -} - void btrfs_put_block_group_cache(struct btrfs_fs_info *info) { struct btrfs_block_group_cache *block_group; @@ -9988,12 +9934,15 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) { struct btrfs_path *path; int ret; + struct btrfs_mapping_tree *map_tree = &info->mapping_tree; + struct btrfs_root *extent_root = info->extent_root; struct btrfs_block_group_cache *cache; struct btrfs_space_info *space_info; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; int need_clear = 0; + u64 cur = 0; u64 cache_gen; u64 feature; int mixed; @@ -10001,13 +9950,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) feature = btrfs_super_incompat_flags(info->super_copy); mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS); - key.objectid = 0; - key.offset = 0; - key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = READA_FORWARD; cache_gen = btrfs_super_cache_generation(info->super_copy); if (btrfs_test_opt(info, SPACE_CACHE) && @@ -10017,10 +9962,30 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) need_clear = 1; while (1) { - ret = find_first_block_group(info, path, &key); - if (ret > 0) + struct extent_map *em; + + read_lock(&map_tree->map_tree.lock); + em = lookup_extent_mapping(&map_tree->map_tree, cur, + ((u64)-1) - cur); + read_unlock(&map_tree->map_tree.lock); + if (!em) break; - if (ret != 0) + + key.objectid = em->start; + key.offset = em->len; + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + cur = em->start + em->len; + free_extent_map(em); + + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + if (ret > 0) { + WARN(1, KERN_ERR + "chunk [%llu %llu) doesn't has its block group item\n", + key.objectid, key.objectid + key.offset); + ret = -ENOENT; + goto error; + } + if (ret < 0) goto error; leaf = path->nodes[0]; @@ -10062,7 +10027,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) goto error; } - key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(path); /* -- 2.16.2 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html