Hi, Qu,

On 2015/09/08 18:22, Qu Wenruo wrote:
> Now fallocate will do accurate qgroup reserve space check, unlike old
> method, which will always reserve the whole length of the range.
> 
> With this patch, fallocate will:
> 1) Iterate the desired range and mark in data rsv map
>     Only range which is going to be allocated will be recorded in data
>     rsv map and reserve the space.
>     For already allocated range (normal/prealloc extent) they will be
>     skipped.
>     Also, record the marked range into a new list for later use.
> 
> 2) If 1) succeeded, do real file extent allocate.
>     And at file extent allocation time, corresponding range will be
>     removed from the range in data rsv map.
> 
> Signed-off-by: Qu Wenruo <quwen...@cn.fujitsu.com>
> ---
>   fs/btrfs/file.c | 147 
> +++++++++++++++++++++++++++++++++++++++++---------------
>   1 file changed, 107 insertions(+), 40 deletions(-)
> 
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index c1eec4f..26e59bc 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -2545,17 +2545,61 @@ out_only_mutex:
>       return err;
>   }
>   
> +/* Helper structure to record which range is already reserved */
> +struct falloc_range {
> +     struct list_head list;
> +     u64 start;
> +     u64 len;
> +};
> +
> +/*
> + * Helper function to add falloc range
> + *
> + * Caller should have locked the larger range of extent containing
> + * [start, len)
> + */
> +static int add_falloc_range(struct list_head *head, u64 start, u64 len)
> +{
> +     struct falloc_range *prev = NULL;
> +     struct falloc_range *range = NULL;
> +
> +     if (list_empty(head))
> +             goto insert;
> +
> +     /*
> +      * As fallocate iterate by bytenr order, we only need to check
> +      * the last range.
> +      */
> +     prev = list_entry(head->prev, struct falloc_range, list);
> +     if (prev->start + prev->len == start) {
> +             prev->len += len;
> +             return 0;
> +     }
> +insert:
> +     range = kmalloc(sizeof(*range), GFP_NOFS);
> +     if (!range)
> +             return -ENOMEM;
> +     range->start = start;
> +     range->len = len;
> +     list_add_tail(&range->list, head);
> +     return 0;
> +}
> +
>   static long btrfs_fallocate(struct file *file, int mode,
>                           loff_t offset, loff_t len)
>   {
>       struct inode *inode = file_inode(file);
>       struct extent_state *cached_state = NULL;
> +     struct falloc_range *range;
> +     struct falloc_range *tmp;
> +     struct list_head reserve_list;
>       u64 cur_offset;
>       u64 last_byte;
>       u64 alloc_start;
>       u64 alloc_end;
>       u64 alloc_hint = 0;
>       u64 locked_end;
> +     u64 actual_end = 0;
>       struct extent_map *em;
>       int blocksize = BTRFS_I(inode)->root->sectorsize;
>       int ret;
> @@ -2571,10 +2615,11 @@ static long btrfs_fallocate(struct file *file, int 
> mode,
>               return btrfs_punch_hole(inode, offset, len);
>   
>       /*
> -      * Make sure we have enough space before we do the
> -      * allocation.
> +      * Only trigger disk allocation, don't trigger qgroup reserve
> +      *
> +      * For qgroup space, it will be checked later.
>        */
> -     ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, 
> alloc_end - alloc_start);
> +     ret = btrfs_alloc_data_chunk_ondemand(inode, alloc_end - alloc_start);
>       if (ret)
>               return ret;
>   
> @@ -2583,6 +2628,13 @@ static long btrfs_fallocate(struct file *file, int 
> mode,
>       if (ret)
>               goto out;
>   
> +     /*
> +      * TODO: Move these two operations after we have checked
> +      * accurate reserved space, or fallocate can still fail but
> +      * with page truncated or size expanded.
> +      *
> +      * But that's a minor problem and won't do much harm BTW.
> +      */
>       if (alloc_start > inode->i_size) {
>               ret = btrfs_cont_expand(inode, i_size_read(inode),
>                                       alloc_start);
> @@ -2641,10 +2693,10 @@ static long btrfs_fallocate(struct file *file, int 
> mode,
>               }
>       }
>   
> +     /* First, check if we exceed the qgroup limit */
> +     INIT_LIST_HEAD(&reserve_list);
>       cur_offset = alloc_start;
>       while (1) {
> -             u64 actual_end;
> -
>               em = btrfs_get_extent(inode, NULL, 0, cur_offset,
>                                     alloc_end - cur_offset, 0);
>               if (IS_ERR_OR_NULL(em)) {
> @@ -2657,54 +2709,69 @@ static long btrfs_fallocate(struct file *file, int 
> mode,
>               last_byte = min(extent_map_end(em), alloc_end);
>               actual_end = min_t(u64, extent_map_end(em), offset + len);
>               last_byte = ALIGN(last_byte, blocksize);
> -
>               if (em->block_start == EXTENT_MAP_HOLE ||
>                   (cur_offset >= inode->i_size &&
>                    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
> -                     ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
> -                                                     last_byte - cur_offset,
> -                                                     1 << inode->i_blkbits,
> -                                                     offset + len,
> -                                                     &alloc_hint);
> -             } else if (actual_end > inode->i_size &&
> -                        !(mode & FALLOC_FL_KEEP_SIZE)) {
> -                     struct btrfs_trans_handle *trans;
> -                     struct btrfs_root *root = BTRFS_I(inode)->root;
> -
> -                     /*
> -                      * We didn't need to allocate any more space, but we
> -                      * still extended the size of the file so we need to
> -                      * update i_size and the inode item.
> -                      */
> -                     trans = btrfs_start_transaction(root, 1);
> -                     if (IS_ERR(trans)) {
> -                             ret = PTR_ERR(trans);
> -                     } else {
> -                             inode->i_ctime = CURRENT_TIME;
> -                             i_size_write(inode, actual_end);
> -                             btrfs_ordered_update_i_size(inode, actual_end,
> -                                                         NULL);
> -                             ret = btrfs_update_inode(trans, root, inode);
> -                             if (ret)
> -                                     btrfs_end_transaction(trans, root);
> -                             else
> -                                     ret = btrfs_end_transaction(trans,
> -                                                                 root);
> +                     ret = add_falloc_range(&reserve_list, cur_offset,
> +                                            last_byte - cur_offset);
> +                     if (ret < 0) {
> +                             free_extent_map(em);
> +                             goto out;
>                       }
> +                     ret = btrfs_qgroup_reserve_data(inode, cur_offset,
> +                                     last_byte - cur_offset);
>               }
>               free_extent_map(em);
> -             if (ret < 0)
> -                     break;
> -
>               cur_offset = last_byte;
> -             if (cur_offset >= alloc_end) {
> -                     ret = 0;
> +             if (cur_offset >= alloc_end)
>                       break;
> +     }
> +     if (ret < 0)
> +             goto out;
> +
> +     /* Now we are sure qgroup reserved enough space now */
> +     list_for_each_entry_safe(range, tmp, &reserve_list, list) {
> +             ret = btrfs_prealloc_file_range(inode, mode, range->start,
> +                             range->len, 1 << inode->i_blkbits,
> +                             offset + len, &alloc_hint);
> +             if (ret < 0)
> +                     goto out;
> +     }
> +     if (actual_end > inode->i_size &&
> +         !(mode & FALLOC_FL_KEEP_SIZE)) {
> +             struct btrfs_trans_handle *trans;
> +             struct btrfs_root *root = BTRFS_I(inode)->root;
> +
> +             /*
> +              * We didn't need to allocate any more space, but we
> +              * still extended the size of the file so we need to
> +              * update i_size and the inode item.
> +              */
> +             trans = btrfs_start_transaction(root, 1);
> +             if (IS_ERR(trans)) {
> +                     ret = PTR_ERR(trans);
> +             } else {
> +                     inode->i_ctime = CURRENT_TIME;
> +                     i_size_write(inode, actual_end);
> +                     btrfs_ordered_update_i_size(inode, actual_end, NULL);
> +                     ret = btrfs_update_inode(trans, root, inode);
> +                     if (ret)
> +                             btrfs_end_transaction(trans, root);
> +                     else
> +                             ret = btrfs_end_transaction(trans, root);
>               }
>       }
>       unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
>                            &cached_state, GFP_NOFS);
>   out:
> +     /*
> +      * As we waited the extent range, the data_rsv_map must be empty
> +      * in the range, as written data range will be released from it.

> +      * And for prelloacted extent, it will also be released when

                   preallocated

Thanks,
Tsutomu

> +      * its metadata is written.
> +      * So this is completely used as cleanup.
> +      */
> +     btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
>       mutex_unlock(&inode->i_mutex);
>       /* Let go of our reservation. */
>       btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to