On Fri, Oct 31, 2014 at 09:49:34AM -0400, Josef Bacik wrote: > Our gluster boxes were spending lots of time in statfs because our fs'es are > huge. The problem is statfs loops through all of the block groups looking for > read only block groups, and when you have several terabytes worth of data that > ends up being a lot of block groups. Move the read only block groups onto a > read only list and only proces that list in > btrfs_account_ro_block_groups_free_space to reduce the amount of churn. > Thanks,
Looks good. Reviewed-by: Liu Bo <bo.li....@oracle.com> -liubo > > Signed-off-by: Josef Bacik <jba...@fb.com> > --- > V1->V2: > -list_for_each_entry was using the wrong ->member name. > > fs/btrfs/ctree.h | 4 ++++ > fs/btrfs/extent-tree.c | 36 +++++++++++++----------------------- > 2 files changed, 17 insertions(+), 23 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index d557264e..438f087 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -1170,6 +1170,7 @@ struct btrfs_space_info { > struct percpu_counter total_bytes_pinned; > > struct list_head list; > + struct list_head ro_bgs; > > struct rw_semaphore groups_sem; > /* for block groups in our same type */ > @@ -1305,6 +1306,9 @@ struct btrfs_block_group_cache { > > /* For delayed block group creation or deletion of empty block groups */ > struct list_head bg_list; > + > + /* For read-only block groups */ > + struct list_head ro_list; > }; > > /* delayed seq elem */ > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index 0d599ba..f51004f 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -3518,6 +3518,7 @@ static int update_space_info(struct btrfs_fs_info > *info, u64 flags, > found->chunk_alloc = 0; > found->flush = 0; > init_waitqueue_head(&found->wait); > + INIT_LIST_HEAD(&found->ro_bgs); > > ret = kobject_init_and_add(&found->kobj, &space_info_ktype, > info->space_info_kobj, "%s", > @@ -8525,6 +8526,7 @@ static int set_block_group_ro(struct > btrfs_block_group_cache *cache, int force) > min_allocable_bytes <= sinfo->total_bytes) { > sinfo->bytes_readonly += num_bytes; > cache->ro = 1; > + list_add_tail(&cache->ro_list, &sinfo->ro_bgs); > ret = 0; > } > out: > @@ -8579,15 +8581,20 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle > *trans, > > /* > * helper to account the unused space of all the readonly block group in the > - * list. takes mirrors into account. > + * space_info. takes mirrors into account. > */ > -static u64 __btrfs_get_ro_block_group_free_space(struct list_head > *groups_list) > +u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) > { > struct btrfs_block_group_cache *block_group; > u64 free_bytes = 0; > int factor; > > - list_for_each_entry(block_group, groups_list, list) { > + /* It's df, we don't care if it's racey */ > + if (list_empty(&sinfo->ro_bgs)) > + return 0; > + > + spin_lock(&sinfo->lock); > + list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) { > spin_lock(&block_group->lock); > > if (!block_group->ro) { > @@ -8608,26 +8615,6 @@ static u64 > __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) > > spin_unlock(&block_group->lock); > } > - > - return free_bytes; > -} > - > -/* > - * helper to account the unused space of all the readonly block group in the > - * space_info. takes mirrors into account. > - */ > -u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) > -{ > - int i; > - u64 free_bytes = 0; > - > - spin_lock(&sinfo->lock); > - > - for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) > - if (!list_empty(&sinfo->block_groups[i])) > - free_bytes += __btrfs_get_ro_block_group_free_space( > - &sinfo->block_groups[i]); > - > spin_unlock(&sinfo->lock); > > return free_bytes; > @@ -8647,6 +8634,7 @@ void btrfs_set_block_group_rw(struct btrfs_root *root, > cache->bytes_super - btrfs_block_group_used(&cache->item); > sinfo->bytes_readonly -= num_bytes; > cache->ro = 0; > + list_del_init(&cache->ro_list); > spin_unlock(&cache->lock); > spin_unlock(&sinfo->lock); > } > @@ -9016,6 +9004,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, > u64 start, u64 size) > INIT_LIST_HEAD(&cache->list); > INIT_LIST_HEAD(&cache->cluster_list); > INIT_LIST_HEAD(&cache->bg_list); > + INIT_LIST_HEAD(&cache->ro_list); > btrfs_init_free_space_ctl(cache); > > return cache; > @@ -9425,6 +9414,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle > *trans, > * are still on the list after taking the semaphore > */ > list_del_init(&block_group->list); > + list_del_init(&block_group->ro_list); > if (list_empty(&block_group->space_info->block_groups[index])) { > kobj = block_group->space_info->block_group_kobjs[index]; > block_group->space_info->block_group_kobjs[index] = NULL; > -- > 1.8.3.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html