On Mon, Oct 07, 2019 at 04:17:44PM -0400, Dennis Zhou wrote:
> Non-block group destruction discarding currently only had a single list
> with no minimum discard length. This can lead to caravaning more
> meaningful discards behind a heavily fragmented block group.
> 
> This adds support for multiple lists with minimum discard lengths to
> prevent the caravan effect. We promote block groups back up when we
> exceed the BTRFS_DISCARD_MAX_FILTER size, currently we support only 2
> lists with filters of 1MB and 32KB respectively.
> 
> Signed-off-by: Dennis Zhou <den...@kernel.org>
> ---
>  fs/btrfs/ctree.h            |  2 +-
>  fs/btrfs/discard.c          | 60 +++++++++++++++++++++++++++++++++----
>  fs/btrfs/discard.h          |  4 +++
>  fs/btrfs/free-space-cache.c | 37 +++++++++++++++--------
>  fs/btrfs/free-space-cache.h |  2 +-
>  5 files changed, 85 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index e81f699347e0..b5608f8dc41a 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -439,7 +439,7 @@ struct btrfs_full_stripe_locks_tree {
>  };
>  
>  /* discard control */
> -#define BTRFS_NR_DISCARD_LISTS               2
> +#define BTRFS_NR_DISCARD_LISTS               3
>  
>  struct btrfs_discard_ctl {
>       struct workqueue_struct *discard_workers;
> diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
> index 072c73f48297..296cbffc5957 100644
> --- a/fs/btrfs/discard.c
> +++ b/fs/btrfs/discard.c
> @@ -20,6 +20,10 @@
>  #define BTRFS_DISCARD_MAX_DELAY              (10000UL)
>  #define BTRFS_DISCARD_MAX_IOPS               (10UL)
>  
> +/* montonically decreasing filters after 0 */
> +static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {0,
> +     BTRFS_DISCARD_MAX_FILTER, BTRFS_DISCARD_MIN_FILTER};
> +
>  static struct list_head *
>  btrfs_get_discard_list(struct btrfs_discard_ctl *discard_ctl,
>                      struct btrfs_block_group_cache *cache)
> @@ -120,7 +124,7 @@ find_next_cache(struct btrfs_discard_ctl *discard_ctl, 
> u64 now)
>  }
>  
>  static struct btrfs_block_group_cache *
> -peek_discard_list(struct btrfs_discard_ctl *discard_ctl)
> +peek_discard_list(struct btrfs_discard_ctl *discard_ctl, int *discard_index)
>  {
>       struct btrfs_block_group_cache *cache;
>       u64 now = ktime_get_ns();
> @@ -132,6 +136,7 @@ peek_discard_list(struct btrfs_discard_ctl *discard_ctl)
>  
>       if (cache && now > cache->discard_delay) {
>               discard_ctl->cache = cache;
> +             *discard_index = cache->discard_index;
>               if (cache->discard_index == 0 &&
>                   cache->free_space_ctl->free_space != cache->key.offset) {
>                       __btrfs_add_to_discard_list(discard_ctl, cache);
> @@ -150,6 +155,36 @@ peek_discard_list(struct btrfs_discard_ctl *discard_ctl)
>       return cache;
>  }
>  
> +void btrfs_discard_check_filter(struct btrfs_block_group_cache *cache,
> +                             u64 bytes)
> +{
> +     struct btrfs_discard_ctl *discard_ctl;
> +
> +     if (!cache || !btrfs_test_opt(cache->fs_info, DISCARD_ASYNC))
> +             return;
> +
> +     discard_ctl = &cache->fs_info->discard_ctl;
> +
> +     if (cache && cache->discard_index > 1 &&
> +         bytes >= BTRFS_DISCARD_MAX_FILTER) {
> +             remove_from_discard_list(discard_ctl, cache);
> +             cache->discard_index = 1;

Really need names here, I have no idea what 1 is.

> +             btrfs_add_to_discard_list(discard_ctl, cache);
> +     }
> +}
> +
> +static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
> +                                    struct btrfs_block_group_cache *cache)
> +{
> +     cache->discard_index++;
> +     if (cache->discard_index == BTRFS_NR_DISCARD_LISTS) {
> +             cache->discard_index = 1;
> +             return;
> +     }
> +
> +     btrfs_add_to_discard_list(discard_ctl, cache);
> +}
> +
>  void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
>                              struct btrfs_block_group_cache *cache)
>  {
> @@ -202,23 +237,34 @@ static void btrfs_discard_workfn(struct work_struct 
> *work)
>  {
>       struct btrfs_discard_ctl *discard_ctl;
>       struct btrfs_block_group_cache *cache;
> +     int discard_index = 0;
>       u64 trimmed = 0;
> +     u64 minlen = 0;
>  
>       discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
>  
>  again:
> -     cache = peek_discard_list(discard_ctl);
> +     cache = peek_discard_list(discard_ctl, &discard_index);
>       if (!cache || !btrfs_run_discard_work(discard_ctl))
>               return;
>  
> -     if (btrfs_discard_bitmaps(cache))
> +     minlen = discard_minlen[discard_index];
> +
> +     if (btrfs_discard_bitmaps(cache)) {
> +             u64 maxlen = 0;
> +
> +             if (discard_index)
> +                     maxlen = discard_minlen[discard_index - 1];
> +
>               btrfs_trim_block_group_bitmaps(cache, &trimmed,
>                                              cache->discard_cursor,
>                                              btrfs_block_group_end(cache),
> -                                            0, true);
> -     else
> +                                            minlen, maxlen, true);
> +     } else {
>               btrfs_trim_block_group(cache, &trimmed, cache->discard_cursor,
> -                                    btrfs_block_group_end(cache), 0, true);
> +                                    btrfs_block_group_end(cache),
> +                                    minlen, true);
> +     }
>  
>       discard_ctl->prev_discard = trimmed;
>  
> @@ -231,6 +277,8 @@ static void btrfs_discard_workfn(struct work_struct *work)
>                                cache->key.offset)
>                               btrfs_add_to_discard_free_list(discard_ctl,
>                                                              cache);
> +                     else
> +                             btrfs_update_discard_index(discard_ctl, cache);
>               } else {
>                       cache->discard_cursor = cache->key.objectid;
>                       cache->discard_flags |= BTRFS_DISCARD_BITMAPS;
> diff --git a/fs/btrfs/discard.h b/fs/btrfs/discard.h
> index 898dd92dbf8f..1daa8da4a1b5 100644
> --- a/fs/btrfs/discard.h
> +++ b/fs/btrfs/discard.h
> @@ -18,6 +18,8 @@
>  
>  /* discard size limits */
>  #define BTRFS_DISCARD_MAX_SIZE               (SZ_64M)
> +#define BTRFS_DISCARD_MAX_FILTER     (SZ_1M)
> +#define BTRFS_DISCARD_MIN_FILTER     (SZ_32K)
>  
>  /* discard flags */
>  #define BTRFS_DISCARD_RESET_CURSOR   (1UL << 0)
> @@ -39,6 +41,8 @@ void btrfs_add_to_discard_list(struct btrfs_discard_ctl 
> *discard_ctl,
>                              struct btrfs_block_group_cache *cache);
>  void btrfs_add_to_discard_free_list(struct btrfs_discard_ctl *discard_ctl,
>                                   struct btrfs_block_group_cache *cache);
> +void btrfs_discard_check_filter(struct btrfs_block_group_cache *cache,
> +                             u64 bytes);
>  void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info);
>  
>  void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index ce33803a45b2..ed35dc090df6 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -2471,6 +2471,7 @@ int __btrfs_add_free_space(struct btrfs_fs_info 
> *fs_info,
>       if (ret)
>               kmem_cache_free(btrfs_free_space_cachep, info);
>  out:
> +     btrfs_discard_check_filter(cache, bytes);

So we're only accounting the new space?  What if we merge with a larger area
here?  We should probably make our decision based on the actual trimable area.

>       btrfs_discard_update_discardable(cache, ctl);
>       spin_unlock(&ctl->tree_lock);
>  
> @@ -3409,7 +3410,13 @@ static int trim_no_bitmap(struct 
> btrfs_block_group_cache *block_group,
>                               goto next;
>                       }
>                       unlink_free_space(ctl, entry);
> -                     if (bytes > BTRFS_DISCARD_MAX_SIZE) {
> +                     /*
> +                      * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
> +                      * If X < BTRFS_DISCARD_MIN_FILTER, we won't trim X when
> +                      * we come back around.  So trim it now.
> +                      */
> +                     if (bytes > (BTRFS_DISCARD_MAX_SIZE +
> +                                  BTRFS_DISCARD_MIN_FILTER)) {
>                               bytes = extent_bytes = BTRFS_DISCARD_MAX_SIZE;
>                               entry->offset += BTRFS_DISCARD_MAX_SIZE;
>                               entry->bytes -= BTRFS_DISCARD_MAX_SIZE;
> @@ -3510,7 +3517,7 @@ static void end_trimming_bitmap(struct 
> btrfs_free_space_ctl *ctl,
>  
>  static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
>                       u64 *total_trimmed, u64 start, u64 end, u64 minlen,
> -                     bool async)
> +                     u64 maxlen, bool async)
>  {
>       struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
>       struct btrfs_free_space *entry;
> @@ -3535,7 +3542,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache 
> *block_group,
>               }
>  
>               entry = tree_search_offset(ctl, offset, 1, 0);
> -             if (!entry || (async && start == offset &&
> +             if (!entry || (async && minlen && start == offset &&
>                              btrfs_free_space_trimmed(entry))) {

Huh?  Why do we care if minlen is set if our entry is already trimmed?  If we're
already trimmed we should just skip it even with minlen set, right?  Thanks,

Josef

Reply via email to