On 18.08.2017 00:23, jo...@toxicpanda.com wrote:
> From: Josef Bacik <jba...@fb.com>
> 
> The way we handle delalloc metadata reservations has gotten
> progressively more complicated over the years.  There is so much cruft
> and weirdness around keeping the reserved count and outstanding counters
> consistent and handling the error cases that it's impossible to
> understand.
> 
> Fix this by making the delalloc block rsv per-inode.  This way we can
> calculate the actual size of the outstanding metadata reservations every
> time we make a change, and then reserve the delta based on that amount.
> This greatly simplifies the code everywhere, and makes the error
> handling in btrfs_delalloc_reserve_metadata far less terrifying.
> 
> Signed-off-by: Josef Bacik <jba...@fb.com>
> ---
>  fs/btrfs/btrfs_inode.h       |  29 ++--
>  fs/btrfs/ctree.h             |   5 +-
>  fs/btrfs/delayed-inode.c     |  46 +-----
>  fs/btrfs/disk-io.c           |  18 ++-
>  fs/btrfs/extent-tree.c       | 330 
> +++++++++++++++----------------------------
>  fs/btrfs/inode.c             |  18 ++-
>  include/trace/events/btrfs.h |  22 ---
>  7 files changed, 147 insertions(+), 321 deletions(-)
> 
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index 2894ad6..71a4ded 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -36,14 +36,13 @@
>  #define BTRFS_INODE_ORPHAN_META_RESERVED     1
>  #define BTRFS_INODE_DUMMY                    2
>  #define BTRFS_INODE_IN_DEFRAG                        3
> -#define BTRFS_INODE_DELALLOC_META_RESERVED   4
> -#define BTRFS_INODE_HAS_ORPHAN_ITEM          5
> -#define BTRFS_INODE_HAS_ASYNC_EXTENT         6
> -#define BTRFS_INODE_NEEDS_FULL_SYNC          7
> -#define BTRFS_INODE_COPY_EVERYTHING          8
> -#define BTRFS_INODE_IN_DELALLOC_LIST         9
> -#define BTRFS_INODE_READDIO_NEED_LOCK                10
> -#define BTRFS_INODE_HAS_PROPS                        11
> +#define BTRFS_INODE_HAS_ORPHAN_ITEM          4
> +#define BTRFS_INODE_HAS_ASYNC_EXTENT         5
> +#define BTRFS_INODE_NEEDS_FULL_SYNC          6
> +#define BTRFS_INODE_COPY_EVERYTHING          7
> +#define BTRFS_INODE_IN_DELALLOC_LIST         8
> +#define BTRFS_INODE_READDIO_NEED_LOCK                9
> +#define BTRFS_INODE_HAS_PROPS                        10
>  
>  /* in memory btrfs inode */
>  struct btrfs_inode {
> @@ -176,7 +175,8 @@ struct btrfs_inode {
>        * of extent items we've reserved metadata for.
>        */
>       unsigned outstanding_extents;
> -     unsigned reserved_extents;
> +
> +     struct btrfs_block_rsv block_rsv;
>  
>       /*
>        * Cached values if inode properties
> @@ -278,17 +278,6 @@ static inline void btrfs_mod_outstanding_extents(struct 
> btrfs_inode *inode,
>                                                 mod);
>  }
>  
> -static inline void btrfs_mod_reserved_extents(struct btrfs_inode *inode,
> -                                           int mod)
> -{
> -     ASSERT(spin_is_locked(&inode->lock));
> -     inode->reserved_extents += mod;
> -     if (btrfs_is_free_space_inode(inode))
> -             return;
> -     trace_btrfs_inode_mod_reserved_extents(inode->root, btrfs_ino(inode),
> -                                            mod);
> -}
> -
>  static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 
> generation)
>  {
>       int ret = 0;
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index b43114d..a4ee115 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -763,8 +763,6 @@ struct btrfs_fs_info {
>        * delayed dir index item
>        */
>       struct btrfs_block_rsv global_block_rsv;
> -     /* block reservation for delay allocation */
> -     struct btrfs_block_rsv delalloc_block_rsv;
>       /* block reservation for metadata operations */
>       struct btrfs_block_rsv trans_block_rsv;
>       /* block reservation for chunk tree */
> @@ -2744,6 +2742,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
>  void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
>  struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
>                                             unsigned short type);
> +void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
> +                                struct btrfs_block_rsv *rsv,
> +                                unsigned short type);
>  void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
>                         struct btrfs_block_rsv *rsv);
>  void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
> diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
> index 19e4ad2..5d73f79 100644
> --- a/fs/btrfs/delayed-inode.c
> +++ b/fs/btrfs/delayed-inode.c
> @@ -581,7 +581,6 @@ static int btrfs_delayed_inode_reserve_metadata(
>       struct btrfs_block_rsv *dst_rsv;
>       u64 num_bytes;
>       int ret;
> -     bool release = false;
>  
>       src_rsv = trans->block_rsv;
>       dst_rsv = &fs_info->delayed_block_rsv;
> @@ -589,36 +588,13 @@ static int btrfs_delayed_inode_reserve_metadata(
>       num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
>  
>       /*
> -      * If our block_rsv is the delalloc block reserve then check and see if
> -      * we have our extra reservation for updating the inode.  If not fall
> -      * through and try to reserve space quickly.
> -      *
> -      * We used to try and steal from the delalloc block rsv or the global
> -      * reserve, but we'd steal a full reservation, which isn't kind.  We are
> -      * here through delalloc which means we've likely just cowed down close
> -      * to the leaf that contains the inode, so we would steal less just
> -      * doing the fallback inode update, so if we do end up having to steal
> -      * from the global block rsv we hopefully only steal one or two blocks
> -      * worth which is less likely to hurt us.
> -      */
> -     if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
> -             spin_lock(&inode->lock);
> -             if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
> -                                    &inode->runtime_flags))
> -                     release = true;
> -             else
> -                     src_rsv = NULL;
> -             spin_unlock(&inode->lock);
> -     }
> -
> -     /*
>        * btrfs_dirty_inode will update the inode under btrfs_join_transaction
>        * which doesn't reserve space for speed.  This is a problem since we
>        * still need to reserve space for this update, so try to reserve the
>        * space.
>        *
>        * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
> -      * we're accounted for.
> +      * we always reserve enough to update the inode item.
>        */
>       if (!src_rsv || (!trans->bytes_reserved &&
>                        src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
> @@ -643,32 +619,12 @@ static int btrfs_delayed_inode_reserve_metadata(
>       }
>  
>       ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
> -
> -     /*
> -      * Migrate only takes a reservation, it doesn't touch the size of the
> -      * block_rsv.  This is to simplify people who don't normally have things
> -      * migrated from their block rsv.  If they go to release their
> -      * reservation, that will decrease the size as well, so if migrate
> -      * reduced size we'd end up with a negative size.  But for the
> -      * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
> -      * but we could in fact do this reserve/migrate dance several times
> -      * between the time we did the original reservation and we'd clean it
> -      * up.  So to take care of this, release the space for the meta
> -      * reservation here.  I think it may be time for a documentation page on
> -      * how block rsvs. work.
> -      */
>       if (!ret) {
>               trace_btrfs_space_reservation(fs_info, "delayed_inode",
>                                             btrfs_ino(inode), num_bytes, 1);
>               node->bytes_reserved = num_bytes;
>       }
>  
> -     if (release) {
> -             trace_btrfs_space_reservation(fs_info, "delalloc",
> -                                           btrfs_ino(inode), num_bytes, 0);
> -             btrfs_block_rsv_release(fs_info, src_rsv, num_bytes);
> -     }
> -
>       return ret;
>  }
>  
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 9283e28..5681f8f 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2607,14 +2607,6 @@ int open_ctree(struct super_block *sb,
>               goto fail_delalloc_bytes;
>       }
>  
> -     fs_info->btree_inode = new_inode(sb);
> -     if (!fs_info->btree_inode) {
> -             err = -ENOMEM;
> -             goto fail_bio_counter;
> -     }
> -
> -     mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
> -
>       INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
>       INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
>       INIT_LIST_HEAD(&fs_info->trans_list);
> @@ -2647,8 +2639,6 @@ int open_ctree(struct super_block *sb,
>       btrfs_mapping_init(&fs_info->mapping_tree);
>       btrfs_init_block_rsv(&fs_info->global_block_rsv,
>                            BTRFS_BLOCK_RSV_GLOBAL);
> -     btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
> -                          BTRFS_BLOCK_RSV_DELALLOC);
>       btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
>       btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
>       btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
> @@ -2679,6 +2669,14 @@ int open_ctree(struct super_block *sb,
>  
>       INIT_LIST_HEAD(&fs_info->ordered_roots);
>       spin_lock_init(&fs_info->ordered_root_lock);
> +
> +     fs_info->btree_inode = new_inode(sb);
> +     if (!fs_info->btree_inode) {
> +             err = -ENOMEM;
> +             goto fail_bio_counter;
> +     }
> +     mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
> +
>       fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
>                                       GFP_KERNEL);
>       if (!fs_info->delayed_root) {

This hunk seem to be a leftover from some other work, so you can drop it
for v2. Of course the the removal of the call to btrfs_init_block_rsv
should stay.

> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index a313487..8484e0f 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -4761,7 +4761,6 @@ static inline u64 calc_reclaim_items_nr(struct 
> btrfs_fs_info *fs_info,
>  static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
>                           u64 orig, bool wait_ordered)
>  {
> -     struct btrfs_block_rsv *block_rsv;
>       struct btrfs_space_info *space_info;
>       struct btrfs_trans_handle *trans;
>       u64 delalloc_bytes;
> @@ -4777,8 +4776,7 @@ static void shrink_delalloc(struct btrfs_fs_info 
> *fs_info, u64 to_reclaim,
>       to_reclaim = items * EXTENT_SIZE_PER_ITEM;
>  
>       trans = (struct btrfs_trans_handle *)current->journal_info;
> -     block_rsv = &fs_info->delalloc_block_rsv;
> -     space_info = block_rsv->space_info;
> +     space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
>  
>       delalloc_bytes = percpu_counter_sum_positive(
>                                               &fs_info->delalloc_bytes);
> @@ -5502,7 +5500,8 @@ static void space_info_add_new_bytes(struct 
> btrfs_fs_info *fs_info,
>  
>  static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
>                                   struct btrfs_block_rsv *block_rsv,
> -                                 struct btrfs_block_rsv *dest, u64 num_bytes)
> +                                 struct btrfs_block_rsv *dest, u64 num_bytes,
> +                                 u64 *released_bytes)
>  {

Instead of adding yet another parameter, why not just return the value?
That way callers can handle it/check on their own accord. It seems this
is only ever used for debugging (tracepoint) purposes anyway

>       struct btrfs_space_info *space_info = block_rsv->space_info;
>  
> @@ -5520,6 +5519,8 @@ static void block_rsv_release_bytes(struct 
> btrfs_fs_info *fs_info,
>       spin_unlock(&block_rsv->lock);
>  
>       if (num_bytes > 0) {
> +             if (released_bytes)
> +                     *released_bytes = num_bytes;
>               if (dest) {
>                       spin_lock(&dest->lock);
>                       if (!dest->full) {
> @@ -5561,6 +5562,15 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, 
> unsigned short type)
>       rsv->type = type;
>  }
>  
> +void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
> +                                struct btrfs_block_rsv *rsv,
> +                                unsigned short type)
> +{
> +     btrfs_init_block_rsv(rsv, type);
> +     rsv->space_info = __find_space_info(fs_info,
> +                                         BTRFS_BLOCK_GROUP_METADATA);
> +}
> +
>  struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
>                                             unsigned short type)
>  {
> @@ -5570,9 +5580,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct 
> btrfs_fs_info *fs_info,
>       if (!block_rsv)
>               return NULL;
>  
> -     btrfs_init_block_rsv(block_rsv, type);
> -     block_rsv->space_info = __find_space_info(fs_info,
> -                                               BTRFS_BLOCK_GROUP_METADATA);
> +     btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
>       return block_rsv;
>  }
>  
> @@ -5655,6 +5663,68 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
>       return ret;
>  }
>  
> +/**
> + * btrfs_inode_rsv_refill - refill the inode block rsv.
> + * @inode - the inode we are refilling.
> + * @flush - the flusing restriction.
> + *
> + * Essentially the same as btrfs_block_rsv_refill, except it uses the
> + * block_rsv->size as the minimum size.  We'll either refill the missing 
> amount
> + * or return if we already have enough space.  This will also handle the 
> resreve
> + * tracepoint for the reserved amount.
> + */
> +int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
> +                        enum btrfs_reserve_flush_enum flush)
> +{
> +     struct btrfs_root *root = inode->root;
> +     struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
> +     u64 num_bytes = 0;
> +     int ret = -ENOSPC;
> +
> +     spin_lock(&block_rsv->lock);
> +     if (block_rsv->reserved < block_rsv->size)
> +             num_bytes = block_rsv->size - block_rsv->reserved;
> +     spin_unlock(&block_rsv->lock);
> +
> +     if (num_bytes == 0)
> +             return 0;
> +
> +     ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
> +     if (!ret) {
> +             block_rsv_add_bytes(block_rsv, num_bytes, 0);
> +             trace_btrfs_space_reservation(root->fs_info, "delalloc",
> +                                           btrfs_ino(inode), num_bytes, 1);
> +     }
> +     return ret;
> +}
> +
> +/**
> + * btrfs_inode_rsv_release - release any excessive reservation.
> + * @inode - the inode we need to release from.
> + *
> + * This is the same as btrfs_block_rsv_release, except that it handles the
> + * tracepoint for the reservation.
> + */
> +void btrfs_inode_rsv_release(struct btrfs_inode *inode)
> +{
> +     struct btrfs_fs_info *fs_info = inode->root->fs_info;
> +     struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
> +     struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
> +     u64 release_bytes = 0;
> +
> +     /*
> +      * Since we statically set the block_rsv->size we just want to say we
> +      * are releasing 0 bytes, and then we'll just get the reservation over
> +      * the size free'd.
> +      */
> +     block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
> +                             &release_bytes);
> +     if (release_bytes > 0)
> +             trace_btrfs_space_reservation(fs_info, "delalloc",
> +                                           btrfs_ino(inode), release_bytes,
> +                                           0);
> +}
> +
>  void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
>                            struct btrfs_block_rsv *block_rsv,
>                            u64 num_bytes)
> @@ -5664,7 +5734,8 @@ void btrfs_block_rsv_release(struct btrfs_fs_info 
> *fs_info,
>       if (global_rsv == block_rsv ||
>           block_rsv->space_info != global_rsv->space_info)
>               global_rsv = NULL;
> -     block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes);
> +     block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes,
> +                             NULL);
>  }
>  
>  static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
> @@ -5726,7 +5797,6 @@ static void init_global_block_rsv(struct btrfs_fs_info 
> *fs_info)
>  
>       space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
>       fs_info->global_block_rsv.space_info = space_info;
> -     fs_info->delalloc_block_rsv.space_info = space_info;
>       fs_info->trans_block_rsv.space_info = space_info;
>       fs_info->empty_block_rsv.space_info = space_info;
>       fs_info->delayed_block_rsv.space_info = space_info;
> @@ -5745,9 +5815,7 @@ static void init_global_block_rsv(struct btrfs_fs_info 
> *fs_info)
>  static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
>  {
>       block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
> -                             (u64)-1);
> -     WARN_ON(fs_info->delalloc_block_rsv.size > 0);
> -     WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
> +                             (u64)-1, NULL);
>       WARN_ON(fs_info->trans_block_rsv.size > 0);
>       WARN_ON(fs_info->trans_block_rsv.reserved > 0);
>       WARN_ON(fs_info->chunk_block_rsv.size > 0);
> @@ -5786,7 +5854,7 @@ void btrfs_trans_release_chunk_metadata(struct 
> btrfs_trans_handle *trans)
>       WARN_ON_ONCE(!list_empty(&trans->new_bgs));
>  
>       block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
> -                             trans->chunk_bytes_reserved);
> +                             trans->chunk_bytes_reserved, NULL);
>       trans->chunk_bytes_reserved = 0;
>  }
>  
> @@ -5886,95 +5954,37 @@ void btrfs_subvolume_release_metadata(struct 
> btrfs_fs_info *fs_info,
>       btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
>  }
>  
> -/**
> - * drop_over_reserved_extents - drop our extra extent reservations
> - * @inode: the inode we're dropping the extent for
> - *
> - * We reserve extents we may use, but they may have been merged with other
> - * extents and we may not need the extra reservation.
> - *
> - * We also call this when we've completed io to an extent or had an error and
> - * cleared the outstanding extent, in either case we no longer need our
> - * reservation and can drop the excess.
> - */
> -static unsigned drop_over_reserved_extents(struct btrfs_inode *inode)
> -{
> -     unsigned num_extents = 0;
> -
> -     if (inode->reserved_extents > inode->outstanding_extents) {
> -             num_extents = inode->reserved_extents -
> -                     inode->outstanding_extents;
> -             btrfs_mod_reserved_extents(inode, -num_extents);
> -     }
> -
> -     if (inode->outstanding_extents == 0 &&
> -         test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
> -                            &inode->runtime_flags))
> -             num_extents++;
> -     return num_extents;
> -}
> -
> -/**
> - * calc_csum_metadata_size - return the amount of metadata space that must be
> - *   reserved/freed for the given bytes.
> - * @inode: the inode we're manipulating
> - * @num_bytes: the number of bytes in question
> - * @reserve: 1 if we are reserving space, 0 if we are freeing space
> - *
> - * This adjusts the number of csum_bytes in the inode and then returns the
> - * correct amount of metadata that must either be reserved or freed.  We
> - * calculate how many checksums we can fit into one leaf and then divide the
> - * number of bytes that will need to be checksumed by this value to figure 
> out
> - * how many checksums will be required.  If we are adding bytes then the 
> number
> - * may go up and we will return the number of additional bytes that must be
> - * reserved.  If it is going down we will return the number of bytes that 
> must
> - * be freed.
> - *
> - * This must be called with BTRFS_I(inode)->lock held.
> - */
> -static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes,
> -                                int reserve)
> +static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info 
> *fs_info,
> +                                              struct btrfs_inode *inode)
>  {
> -     struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
> -     u64 old_csums, num_csums;
> +     struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
> +     u64 reserve_size = 0;
> +     u64 csum_leaves;
> +     unsigned outstanding_extents;
>  
> -     if (inode->flags & BTRFS_INODE_NODATASUM && inode->csum_bytes == 0)
> -             return 0;
> +     ASSERT(spin_is_locked(&inode->lock));

How about using lockdep_is_held so that this is caught even if btrfs is
compiled without assertions enabled ?

> +     outstanding_extents = inode->outstanding_extents;
> +     if (outstanding_extents)
> +             reserve_size = btrfs_calc_trans_metadata_size(fs_info,
> +                                             outstanding_extents + 1);
> +     csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
> +                                              inode->csum_bytes);
> +     reserve_size += btrfs_calc_trans_metadata_size(fs_info,
> +                                                    csum_leaves);
>  
> -     old_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
> -     if (reserve)
> -             inode->csum_bytes += num_bytes;
> -     else
> -             inode->csum_bytes -= num_bytes;
> -     num_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
> -
> -     /* No change, no need to reserve more */
> -     if (old_csums == num_csums)
> -             return 0;
> -
> -     if (reserve)
> -             return btrfs_calc_trans_metadata_size(fs_info,
> -                                                   num_csums - old_csums);
> -
> -     return btrfs_calc_trans_metadata_size(fs_info, old_csums - num_csums);
> +     spin_lock(&block_rsv->lock);
> +     block_rsv->size = reserve_size;
> +     spin_unlock(&block_rsv->lock);
>  }
>  
>  int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
>  {
>       struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
>       struct btrfs_root *root = inode->root;
> -     struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
> -     u64 to_reserve = 0;
> -     u64 csum_bytes;
> -     unsigned nr_extents, reserve_extents;
> +     unsigned nr_extents;
>       enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
>       int ret = 0;
>       bool delalloc_lock = true;
> -     u64 to_free = 0;
> -     unsigned dropped;
> -     bool release_extra = false;
> -     bool underflow = false;
> -     bool did_retry = false;
>  
>       WARN_ON(btrfs_is_free_space_inode(inode));
>  
> @@ -6001,33 +6011,13 @@ int btrfs_delalloc_reserve_metadata(struct 
> btrfs_inode *inode, u64 num_bytes)
>               mutex_lock(&inode->delalloc_mutex);
>  
>       num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
> -retry:
> +
> +     /* Add our new extents and calculate the new rsv size. */
>       spin_lock(&inode->lock);
> -     reserve_extents = nr_extents = count_max_extents(num_bytes);
> +     nr_extents = count_max_extents(num_bytes);
>       btrfs_mod_outstanding_extents(inode, nr_extents);
> -
> -     /*
> -      * Because we add an outstanding extent for ordered before we clear
> -      * delalloc we will double count our outstanding extents slightly.  This
> -      * could mean that we transiently over-reserve, which could result in an
> -      * early ENOSPC if our timing is unlucky.  Keep track of the case that
> -      * we had a reservation underflow so we can retry if we fail.
> -      *
> -      * Keep in mind we can legitimately have more outstanding extents than
> -      * reserved because of fragmentation, so only allow a retry once.
> -      */
> -     if (inode->outstanding_extents >
> -         inode->reserved_extents + nr_extents) {
> -             reserve_extents = inode->outstanding_extents -
> -                     inode->reserved_extents;
> -             underflow = true;
> -     }
> -
> -     /* We always want to reserve a slot for updating the inode. */
> -     to_reserve = btrfs_calc_trans_metadata_size(fs_info,
> -                                                 reserve_extents + 1);
> -     to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
> -     csum_bytes = inode->csum_bytes;
> +     inode->csum_bytes += num_bytes;
> +     btrfs_calculate_inode_block_rsv_size(fs_info, inode);
>       spin_unlock(&inode->lock);
>  
>       if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
> @@ -6037,100 +6027,26 @@ int btrfs_delalloc_reserve_metadata(struct 
> btrfs_inode *inode, u64 num_bytes)
>                       goto out_fail;
>       }
>  
> -     ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
> +     ret = btrfs_inode_rsv_refill(inode, flush);
>       if (unlikely(ret)) {
>               btrfs_qgroup_free_meta(root,
>                                      nr_extents * fs_info->nodesize);
>               goto out_fail;
>       }
>  
> -     spin_lock(&inode->lock);
> -     if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
> -                          &inode->runtime_flags)) {
> -             to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
> -             release_extra = true;
> -     }
> -     btrfs_mod_reserved_extents(inode, reserve_extents);
> -     spin_unlock(&inode->lock);
> -
>       if (delalloc_lock)
>               mutex_unlock(&inode->delalloc_mutex);
> -
> -     if (to_reserve)
> -             trace_btrfs_space_reservation(fs_info, "delalloc",
> -                                           btrfs_ino(inode), to_reserve, 1);
> -     if (release_extra)
> -             btrfs_block_rsv_release(fs_info, block_rsv,
> -                             btrfs_calc_trans_metadata_size(fs_info, 1));
>       return 0;
>  
>  out_fail:
>       spin_lock(&inode->lock);
>       nr_extents = count_max_extents(num_bytes);
>       btrfs_mod_outstanding_extents(inode, -nr_extents);
> -
> -     dropped = drop_over_reserved_extents(inode);
> -     /*
> -      * If the inodes csum_bytes is the same as the original
> -      * csum_bytes then we know we haven't raced with any free()ers
> -      * so we can just reduce our inodes csum bytes and carry on.
> -      */
> -     if (inode->csum_bytes == csum_bytes) {
> -             calc_csum_metadata_size(inode, num_bytes, 0);
> -     } else {
> -             u64 orig_csum_bytes = inode->csum_bytes;
> -             u64 bytes;
> -
> -             /*
> -              * This is tricky, but first we need to figure out how much we
> -              * freed from any free-ers that occurred during this
> -              * reservation, so we reset ->csum_bytes to the csum_bytes
> -              * before we dropped our lock, and then call the free for the
> -              * number of bytes that were freed while we were trying our
> -              * reservation.
> -              */
> -             bytes = csum_bytes - inode->csum_bytes;
> -             inode->csum_bytes = csum_bytes;
> -             to_free = calc_csum_metadata_size(inode, bytes, 0);
> -
> -
> -             /*
> -              * Now we need to see how much we would have freed had we not
> -              * been making this reservation and our ->csum_bytes were not
> -              * artificially inflated.
> -              */
> -             inode->csum_bytes = csum_bytes - num_bytes;
> -             bytes = csum_bytes - orig_csum_bytes;
> -             bytes = calc_csum_metadata_size(inode, bytes, 0);
> -
> -             /*
> -              * Now reset ->csum_bytes to what it should be.  If bytes is
> -              * more than to_free then we would have freed more space had we
> -              * not had an artificially high ->csum_bytes, so we need to free
> -              * the remainder.  If bytes is the same or less then we don't
> -              * need to do anything, the other free-ers did the correct
> -              * thing.
> -              */
> -             inode->csum_bytes = orig_csum_bytes - num_bytes;
> -             if (bytes > to_free)
> -                     to_free = bytes - to_free;
> -             else
> -                     to_free = 0;
> -     }
> +     inode->csum_bytes -= num_bytes;
> +     btrfs_calculate_inode_block_rsv_size(fs_info, inode);
>       spin_unlock(&inode->lock);
> -     if (dropped)
> -             to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
>  
> -     if (to_free) {
> -             btrfs_block_rsv_release(fs_info, block_rsv, to_free);
> -             trace_btrfs_space_reservation(fs_info, "delalloc",
> -                                           btrfs_ino(inode), to_free, 0);
> -     }
> -     if (underflow && !did_retry) {
> -             did_retry = true;
> -             underflow = false;
> -             goto retry;
> -     }
> +     btrfs_inode_rsv_release(inode);
>       if (delalloc_lock)
>               mutex_unlock(&inode->delalloc_mutex);
>       return ret;
> @@ -6148,25 +6064,17 @@ int btrfs_delalloc_reserve_metadata(struct 
> btrfs_inode *inode, u64 num_bytes)
>  void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 
> num_bytes)
>  {
>       struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
> -     u64 to_free = 0;
> -     unsigned dropped;
>  
>       num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
>       spin_lock(&inode->lock);
> -     dropped = drop_over_reserved_extents(inode);
> -     if (num_bytes)
> -             to_free = calc_csum_metadata_size(inode, num_bytes, 0);
> +     inode->csum_bytes -= num_bytes;
> +     btrfs_calculate_inode_block_rsv_size(fs_info, inode);
>       spin_unlock(&inode->lock);
> -     if (dropped > 0)
> -             to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
>  
>       if (btrfs_is_testing(fs_info))
>               return;
>  
> -     trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
> -                                   to_free, 0);
> -
> -     btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
> +     btrfs_inode_rsv_release(inode);
>  }
>  
>  /**
> @@ -6184,25 +6092,17 @@ void btrfs_delalloc_release_extents(struct 
> btrfs_inode *inode, u64 num_bytes)
>  {
>       struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
>       unsigned num_extents;
> -     u64 to_free;
> -     unsigned dropped;
>  
>       spin_lock(&inode->lock);
>       num_extents = count_max_extents(num_bytes);
>       btrfs_mod_outstanding_extents(inode, -num_extents);
> -     dropped = drop_over_reserved_extents(inode);
> +     btrfs_calculate_inode_block_rsv_size(fs_info, inode);
>       spin_unlock(&inode->lock);
>  
> -     if (!dropped)
> -             return;
> -
>       if (btrfs_is_testing(fs_info))
>               return;
>  
> -     to_free = btrfs_calc_trans_metadata_size(fs_info, dropped);
> -     trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
> -                                   to_free, 0);
> -     btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
> +     btrfs_inode_rsv_release(inode);
>  }
>  
>  /**
> @@ -8427,7 +8327,7 @@ static void unuse_block_rsv(struct btrfs_fs_info 
> *fs_info,
>                           struct btrfs_block_rsv *block_rsv, u32 blocksize)
>  {
>       block_rsv_add_bytes(block_rsv, blocksize, 0);
> -     block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
> +     block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
>  }
>  
>  /*
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 912417b..b0d1e94 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -42,6 +42,7 @@
>  #include <linux/blkdev.h>
>  #include <linux/posix_acl_xattr.h>
>  #include <linux/uio.h>
> +#include <linux/magic.h>
>  #include "ctree.h"
>  #include "disk-io.h"
>  #include "transaction.h"
> @@ -303,7 +304,7 @@ static noinline int cow_file_range_inline(struct 
> btrfs_root *root,
>               btrfs_free_path(path);
>               return PTR_ERR(trans);
>       }
> -     trans->block_rsv = &fs_info->delalloc_block_rsv;
> +     trans->block_rsv = &BTRFS_I(inode)->block_rsv;
>  
>       if (compressed_size && compressed_pages)
>               extent_item_size = btrfs_file_extent_calc_inline_size(
> @@ -2949,7 +2950,7 @@ static int btrfs_finish_ordered_io(struct 
> btrfs_ordered_extent *ordered_extent)
>                       trans = NULL;
>                       goto out;
>               }
> -             trans->block_rsv = &fs_info->delalloc_block_rsv;
> +             trans->block_rsv = &BTRFS_I(inode)->block_rsv;
>               ret = btrfs_update_inode_fallback(trans, root, inode);
>               if (ret) /* -ENOMEM or corruption */
>                       btrfs_abort_transaction(trans, ret);
> @@ -2985,7 +2986,7 @@ static int btrfs_finish_ordered_io(struct 
> btrfs_ordered_extent *ordered_extent)
>               goto out;
>       }
>  
> -     trans->block_rsv = &fs_info->delalloc_block_rsv;
> +     trans->block_rsv = &BTRFS_I(inode)->block_rsv;
>  
>       if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
>               compress_type = ordered_extent->compress_type;
> @@ -8834,7 +8835,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, 
> struct iov_iter *iter)
>       if (iov_iter_rw(iter) == WRITE) {
>               up_read(&BTRFS_I(inode)->dio_sem);
>               current->journal_info = NULL;
> -             btrfs_delalloc_release_extents(BTRFS_I(inode), count);
>               if (ret < 0 && ret != -EIOCBQUEUED) {
>                       if (dio_data.reserve)
>                               btrfs_delalloc_release_space(inode, 
> data_reserved,
> @@ -8855,6 +8855,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, 
> struct iov_iter *iter)
>               } else if (ret >= 0 && (size_t)ret < count)
>                       btrfs_delalloc_release_space(inode, data_reserved,
>                                       offset, count - (size_t)ret);
> +             btrfs_delalloc_release_extents(BTRFS_I(inode), count);
>       }
>  out:
>       if (wakeup)
> @@ -9419,6 +9420,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle 
> *trans,
>  
>  struct inode *btrfs_alloc_inode(struct super_block *sb)
>  {
> +     struct btrfs_fs_info *fs_info = btrfs_sb(sb);
>       struct btrfs_inode *ei;
>       struct inode *inode;
>  
> @@ -9445,8 +9447,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
>  
>       spin_lock_init(&ei->lock);
>       ei->outstanding_extents = 0;
> -     ei->reserved_extents = 0;
> -
> +     if (sb->s_magic != BTRFS_TEST_MAGIC)
> +             btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
> +                                           BTRFS_BLOCK_RSV_DELALLOC);

Why the dependendance on BTRFS_TEST_MAGIC here?

>       ei->runtime_flags = 0;
>       ei->prop_compress = BTRFS_COMPRESS_NONE;
>       ei->defrag_compress = BTRFS_COMPRESS_NONE;
> @@ -9496,8 +9499,9 @@ void btrfs_destroy_inode(struct inode *inode)
>  
>       WARN_ON(!hlist_empty(&inode->i_dentry));
>       WARN_ON(inode->i_data.nrpages);
> +     WARN_ON(BTRFS_I(inode)->block_rsv.reserved);
> +     WARN_ON(BTRFS_I(inode)->block_rsv.size);
>       WARN_ON(BTRFS_I(inode)->outstanding_extents);
> -     WARN_ON(BTRFS_I(inode)->reserved_extents);
>       WARN_ON(BTRFS_I(inode)->delalloc_bytes);
>       WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
>       WARN_ON(BTRFS_I(inode)->csum_bytes);
> diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
> index 5e48b98..a9e63fb 100644
> --- a/include/trace/events/btrfs.h
> +++ b/include/trace/events/btrfs.h
> @@ -1713,28 +1713,6 @@ TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
>                       (unsigned long long)__entry->ino, __entry->mod)
>  );
>  
> -TRACE_EVENT(btrfs_inode_mod_reserved_extents,
> -     TP_PROTO(struct btrfs_root *root, u64 ino, int mod),
> -
> -     TP_ARGS(root, ino, mod),
> -
> -     TP_STRUCT__entry_btrfs(
> -             __field(        u64, root_objectid      )
> -             __field(        u64, ino                )
> -             __field(        int, mod                )
> -     ),
> -
> -     TP_fast_assign_btrfs(root->fs_info,
> -             __entry->root_objectid  = root->objectid;
> -             __entry->ino            = ino;
> -             __entry->mod            = mod;
> -     ),
> -
> -     TP_printk_btrfs("root = %llu(%s), ino = %llu, mod = %d",
> -                     show_root_type(__entry->root_objectid),
> -                     (unsigned long long)__entry->ino, __entry->mod)
> -);
> -
>  #endif /* _TRACE_BTRFS_H */
>  
>  /* This part must be outside protection */
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to