On Mon, Oct 07, 2019 at 04:17:44PM -0400, Dennis Zhou wrote: > Non-block group destruction discarding currently only had a single list > with no minimum discard length. This can lead to caravaning more > meaningful discards behind a heavily fragmented block group. > > This adds support for multiple lists with minimum discard lengths to > prevent the caravan effect. We promote block groups back up when we > exceed the BTRFS_DISCARD_MAX_FILTER size, currently we support only 2 > lists with filters of 1MB and 32KB respectively. > > Signed-off-by: Dennis Zhou <den...@kernel.org> > --- > fs/btrfs/ctree.h | 2 +- > fs/btrfs/discard.c | 60 +++++++++++++++++++++++++++++++++---- > fs/btrfs/discard.h | 4 +++ > fs/btrfs/free-space-cache.c | 37 +++++++++++++++-------- > fs/btrfs/free-space-cache.h | 2 +- > 5 files changed, 85 insertions(+), 20 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index e81f699347e0..b5608f8dc41a 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -439,7 +439,7 @@ struct btrfs_full_stripe_locks_tree { > }; > > /* discard control */ > -#define BTRFS_NR_DISCARD_LISTS 2 > +#define BTRFS_NR_DISCARD_LISTS 3 > > struct btrfs_discard_ctl { > struct workqueue_struct *discard_workers; > diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c > index 072c73f48297..296cbffc5957 100644 > --- a/fs/btrfs/discard.c > +++ b/fs/btrfs/discard.c > @@ -20,6 +20,10 @@ > #define BTRFS_DISCARD_MAX_DELAY (10000UL) > #define BTRFS_DISCARD_MAX_IOPS (10UL) > > +/* montonically decreasing filters after 0 */ > +static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {0, > + BTRFS_DISCARD_MAX_FILTER, BTRFS_DISCARD_MIN_FILTER}; > + > static struct list_head * > btrfs_get_discard_list(struct btrfs_discard_ctl *discard_ctl, > struct btrfs_block_group_cache *cache) > @@ -120,7 +124,7 @@ find_next_cache(struct btrfs_discard_ctl *discard_ctl, > u64 now) > } > > static struct btrfs_block_group_cache * > -peek_discard_list(struct btrfs_discard_ctl *discard_ctl) > +peek_discard_list(struct btrfs_discard_ctl *discard_ctl, int *discard_index) > { > struct btrfs_block_group_cache *cache; > u64 now = ktime_get_ns(); > @@ -132,6 +136,7 @@ peek_discard_list(struct btrfs_discard_ctl *discard_ctl) > > if (cache && now > cache->discard_delay) { > discard_ctl->cache = cache; > + *discard_index = cache->discard_index; > if (cache->discard_index == 0 && > cache->free_space_ctl->free_space != cache->key.offset) { > __btrfs_add_to_discard_list(discard_ctl, cache); > @@ -150,6 +155,36 @@ peek_discard_list(struct btrfs_discard_ctl *discard_ctl) > return cache; > } > > +void btrfs_discard_check_filter(struct btrfs_block_group_cache *cache, > + u64 bytes) > +{ > + struct btrfs_discard_ctl *discard_ctl; > + > + if (!cache || !btrfs_test_opt(cache->fs_info, DISCARD_ASYNC)) > + return; > + > + discard_ctl = &cache->fs_info->discard_ctl; > + > + if (cache && cache->discard_index > 1 && > + bytes >= BTRFS_DISCARD_MAX_FILTER) { > + remove_from_discard_list(discard_ctl, cache); > + cache->discard_index = 1;
Really need names here, I have no idea what 1 is. > + btrfs_add_to_discard_list(discard_ctl, cache); > + } > +} > + > +static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl, > + struct btrfs_block_group_cache *cache) > +{ > + cache->discard_index++; > + if (cache->discard_index == BTRFS_NR_DISCARD_LISTS) { > + cache->discard_index = 1; > + return; > + } > + > + btrfs_add_to_discard_list(discard_ctl, cache); > +} > + > void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, > struct btrfs_block_group_cache *cache) > { > @@ -202,23 +237,34 @@ static void btrfs_discard_workfn(struct work_struct > *work) > { > struct btrfs_discard_ctl *discard_ctl; > struct btrfs_block_group_cache *cache; > + int discard_index = 0; > u64 trimmed = 0; > + u64 minlen = 0; > > discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); > > again: > - cache = peek_discard_list(discard_ctl); > + cache = peek_discard_list(discard_ctl, &discard_index); > if (!cache || !btrfs_run_discard_work(discard_ctl)) > return; > > - if (btrfs_discard_bitmaps(cache)) > + minlen = discard_minlen[discard_index]; > + > + if (btrfs_discard_bitmaps(cache)) { > + u64 maxlen = 0; > + > + if (discard_index) > + maxlen = discard_minlen[discard_index - 1]; > + > btrfs_trim_block_group_bitmaps(cache, &trimmed, > cache->discard_cursor, > btrfs_block_group_end(cache), > - 0, true); > - else > + minlen, maxlen, true); > + } else { > btrfs_trim_block_group(cache, &trimmed, cache->discard_cursor, > - btrfs_block_group_end(cache), 0, true); > + btrfs_block_group_end(cache), > + minlen, true); > + } > > discard_ctl->prev_discard = trimmed; > > @@ -231,6 +277,8 @@ static void btrfs_discard_workfn(struct work_struct *work) > cache->key.offset) > btrfs_add_to_discard_free_list(discard_ctl, > cache); > + else > + btrfs_update_discard_index(discard_ctl, cache); > } else { > cache->discard_cursor = cache->key.objectid; > cache->discard_flags |= BTRFS_DISCARD_BITMAPS; > diff --git a/fs/btrfs/discard.h b/fs/btrfs/discard.h > index 898dd92dbf8f..1daa8da4a1b5 100644 > --- a/fs/btrfs/discard.h > +++ b/fs/btrfs/discard.h > @@ -18,6 +18,8 @@ > > /* discard size limits */ > #define BTRFS_DISCARD_MAX_SIZE (SZ_64M) > +#define BTRFS_DISCARD_MAX_FILTER (SZ_1M) > +#define BTRFS_DISCARD_MIN_FILTER (SZ_32K) > > /* discard flags */ > #define BTRFS_DISCARD_RESET_CURSOR (1UL << 0) > @@ -39,6 +41,8 @@ void btrfs_add_to_discard_list(struct btrfs_discard_ctl > *discard_ctl, > struct btrfs_block_group_cache *cache); > void btrfs_add_to_discard_free_list(struct btrfs_discard_ctl *discard_ctl, > struct btrfs_block_group_cache *cache); > +void btrfs_discard_check_filter(struct btrfs_block_group_cache *cache, > + u64 bytes); > void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info); > > void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, > diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c > index ce33803a45b2..ed35dc090df6 100644 > --- a/fs/btrfs/free-space-cache.c > +++ b/fs/btrfs/free-space-cache.c > @@ -2471,6 +2471,7 @@ int __btrfs_add_free_space(struct btrfs_fs_info > *fs_info, > if (ret) > kmem_cache_free(btrfs_free_space_cachep, info); > out: > + btrfs_discard_check_filter(cache, bytes); So we're only accounting the new space? What if we merge with a larger area here? We should probably make our decision based on the actual trimable area. > btrfs_discard_update_discardable(cache, ctl); > spin_unlock(&ctl->tree_lock); > > @@ -3409,7 +3410,13 @@ static int trim_no_bitmap(struct > btrfs_block_group_cache *block_group, > goto next; > } > unlink_free_space(ctl, entry); > - if (bytes > BTRFS_DISCARD_MAX_SIZE) { > + /* > + * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. > + * If X < BTRFS_DISCARD_MIN_FILTER, we won't trim X when > + * we come back around. So trim it now. > + */ > + if (bytes > (BTRFS_DISCARD_MAX_SIZE + > + BTRFS_DISCARD_MIN_FILTER)) { > bytes = extent_bytes = BTRFS_DISCARD_MAX_SIZE; > entry->offset += BTRFS_DISCARD_MAX_SIZE; > entry->bytes -= BTRFS_DISCARD_MAX_SIZE; > @@ -3510,7 +3517,7 @@ static void end_trimming_bitmap(struct > btrfs_free_space_ctl *ctl, > > static int trim_bitmaps(struct btrfs_block_group_cache *block_group, > u64 *total_trimmed, u64 start, u64 end, u64 minlen, > - bool async) > + u64 maxlen, bool async) > { > struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; > struct btrfs_free_space *entry; > @@ -3535,7 +3542,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache > *block_group, > } > > entry = tree_search_offset(ctl, offset, 1, 0); > - if (!entry || (async && start == offset && > + if (!entry || (async && minlen && start == offset && > btrfs_free_space_trimmed(entry))) { Huh? Why do we care if minlen is set if our entry is already trimmed? If we're already trimmed we should just skip it even with minlen set, right? Thanks, Josef