On Sun, 23 Jun 2013 15:07:36 +0900, Namjae Jeon <[email protected]> wrote:
> From: Namjae Jeon <[email protected]>
What is the difference between this ioctl and generic punch_hole?
> 
> The EXT4_IOC_TRUNCATE_BLOCK_RANGE removes the data blocks lying
> between [start, "start + length") and updates the logical block numbers
> of data blocks starting from "start + length" block to last block of file.
> This will maintain contiguous nature of logical block numbers
> after block removal.
> Both the inode's disksize and logical size are updated after block
> removal
> 
> Signed-off-by: Namjae Jeon <[email protected]>
> Signed-off-by: Ashish Sangwan <[email protected]>
> ---
>  fs/ext4/ext4.h         |    8 ++
>  fs/ext4/ext4_extents.h |    3 +
>  fs/ext4/extents.c      |  245 
> ++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/ext4/ioctl.c        |   62 ++++++++++++
>  4 files changed, 318 insertions(+)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 6ed348d..df2c411 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -590,6 +590,7 @@ enum {
>  #define EXT4_IOC_MOVE_EXT            _IOWR('f', 15, struct move_extent)
>  #define EXT4_IOC_RESIZE_FS           _IOW('f', 16, __u64)
>  #define EXT4_IOC_SWAP_BOOT           _IO('f', 17)
> +#define EXT4_IOC_TRUNCATE_BLOCK_RANGE        _IOW('f', 18, struct 
> truncate_range)
>  
>  #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
>  /*
> @@ -682,6 +683,11 @@ struct move_extent {
>       __u64 moved_len;        /* moved block length */
>  };
>  
> +struct truncate_range {
> +     __u32 start_block;
> +     __u32 length;
> +};
> +
>  #define EXT4_EPOCH_BITS 2
>  #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
>  #define EXT4_NSEC_MASK  (~0UL << EXT4_EPOCH_BITS)
> @@ -2692,6 +2698,8 @@ extern int ext4_find_delalloc_range(struct inode *inode,
>  extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
>  extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info 
> *fieinfo,
>                       __u64 start, __u64 len);
> +extern int ext4_ext_truncate_range(struct inode *inode, ext4_lblk_t start,
> +                                ext4_lblk_t end, ext4_lblk_t last_block);
>  
>  
>  /* move_extent.c */
> diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
> index 51bc821..cc113cc 100644
> --- a/fs/ext4/ext4_extents.h
> +++ b/fs/ext4/ext4_extents.h
> @@ -178,6 +178,9 @@ struct ext4_ext_path {
>  #define EXT_MAX_INDEX(__hdr__) \
>       (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
>  
> +#define EXTENT_START_FLAG    0x1
> +#define INDEX_START_FLAG     0x2
> +
>  static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
>  {
>       return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 937593e..ed85e34 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4757,3 +4757,248 @@ int ext4_fiemap(struct inode *inode, struct 
> fiemap_extent_info *fieinfo,
>  
>       return error;
>  }
> +
> +/*
> + * ext4_trange_dirty_path: Function to mark the path buffer dirty.
> + * It also checks if there are sufficient credits left in the
> + * journal to update metadata. If the number of credits are less
> + * restart the handle with additional credits.
> + *
> + * @handle: journal handle
> + * @inode: file inode
> + * @path: pointer to path
> + * @num: number of inodes to be updated
> + *
> + * Returns: 0 on success or negative value on error
> + */
> +int ext4_trange_dirty_path(handle_t *handle, struct inode *inode,
> +                        struct ext4_ext_path *path,
> +                        int num, ...)
> +{
> +     int credits, err, i;
> +     struct inode *iptr;
> +     va_list args;
> +
> +     /*
> +      * Check if need to extend journal credits
> +      * 3 for leaf, sb, and inode plus 2 (bmap and group
> +      * descriptor) for each block group; assume two block
> +      * groups
> +      */
> +     if (handle->h_buffer_credits < 7*(num + 1)) {
> +             credits = ext4_writepage_trans_blocks(inode);
> +             va_start(args, num);
> +             for (i = 1; i <= num; i++) {
> +                     iptr = va_arg(args, struct inode *);
> +                     credits += ext4_writepage_trans_blocks(iptr);
> +             }
> +             va_end(args);
> +             err = ext4_ext_truncate_extend_restart(handle, inode, credits);
> +             /* EAGAIN is success */
> +             if (err && err != -EAGAIN)
> +                     return err;
> +     }
> +     err = ext4_ext_get_access(handle, inode, path);
> +     return err;
> +}
> +
> +/*
> + * ext4_ext_update_path: update the extents of a path structure
> + * lying between path[depth].p_ext and EXT_LAST_EXTENT(path[depth].p_hdr)
> + * subtracting shift from starting block for each extent.
> + *
> + * @path: path for which extents are updated
> + * @shift: Number of blocks to be subtracted from first logical block
> + * that extent covers for each extent.
> + * @inode: file inode
> + * @handle: journal handle
> + * @start_block: Points to the starting block of next extent which is
> + * to be updated.
> + *
> + * Returns: 0 on success or negative on error.
> + */
> +int ext4_ext_update_path(struct ext4_ext_path *path, ext4_lblk_t shift,
> +                      struct inode *inode, handle_t *handle,
> +                      ext4_lblk_t *start_block)
> +{
> +     int depth, err = 0, flag = 0;
> +     struct ext4_extent *ex_start, *ex_last;
> +
> +     depth = path->p_depth;
> +     while (depth >= 0) {
> +             if (depth == path->p_depth) {
> +                     ex_start = path[depth].p_ext;
> +                     if (!ex_start)
> +                             return -EIO;
> +
> +                     err = ext4_trange_dirty_path(handle, inode,
> +                                                  path + depth, 0);
> +                     if (err)
> +                             goto out;
> +
> +                     if (path[depth].p_ext ==
> +                             EXT_FIRST_EXTENT(path[depth].p_hdr))
> +                             flag |= EXTENT_START_FLAG;
> +
> +                     ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
> +                     while (ex_start <= ex_last) {
> +                             *start_block = ex_start->ee_block +
> +                                     ext4_ext_get_actual_len(ex_start);
> +                             ex_start->ee_block -= shift;
> +                             ex_start++;
> +                     }
> +                     err = ext4_ext_dirty(handle, inode, path + depth);
> +                     if (err)
> +                             goto out;
> +             } else {
> +                     /* If encountered starting extent, update index too */
> +                     if (path->p_depth - depth == 1) {
> +                             if (flag & EXTENT_START_FLAG) {
> +                                     /* Update index too */
> +                                     err = ext4_trange_dirty_path(handle,
> +                                                 inode, path + depth, 0);
> +                                     if (err)
> +                                             goto out;
> +                                     path[depth].p_idx->ei_block -= shift;
> +                                     err = ext4_ext_dirty(handle, inode,
> +                                                          path + depth);
> +                                     if (err)
> +                                             goto out;
> +                                     flag &= ~EXTENT_START_FLAG;
> +                             } else
> +                                     /* No need to update any extent index */
> +                                     break;
> +                     }
> +                     /* Check, if earlier encountered starting index */
> +                     if (flag & INDEX_START_FLAG) {
> +                             err = ext4_trange_dirty_path(handle, inode,
> +                                                     path + (depth), 0);
> +                             if (err)
> +                                     goto out;
> +                             path[depth].p_idx->ei_block -= shift;
> +                             err = ext4_ext_dirty(handle, inode,
> +                                                  path + depth);
> +                             if (err)
> +                                     goto out;
> +                             flag &= ~INDEX_START_FLAG;
> +                     }
> +                     /* Check if this is a starting index */
> +                     if (path[depth].p_idx ==
> +                         EXT_FIRST_INDEX(path[depth].p_hdr)) {
> +                             /* starting of a block */
> +                             flag |= INDEX_START_FLAG;
> +                     } else
> +                             break;
> +             }
> +             depth--;
> +     }
> +out:
> +     return err;
> +}
> +
> +/*
> + * ext4_ext_update_logical: update logical blocks ranging from start
> + * to the end block for inode by moving them shift blocks to the left
> + *
> + * @inode: file inode
> + * @handle: journal handle
> + * @start_block : starting block for block updation
> + * @shift: number of blocks to be shifted
> + * @end_block: last block to be updated
> + *
> + * Returns: 0 on success or negative on failure
> + */
> +static int ext4_ext_update_logical(struct inode *inode, handle_t *handle,
> +                                ext4_lblk_t start_block, ext4_lblk_t shift,
> +                                ext4_lblk_t end_block)
> +{
> +     struct ext4_ext_path *path;
> +     int err = 0;
> +
> +     while (start_block < end_block) {
> +             path = ext4_ext_find_extent(inode, start_block, NULL);
> +             if (IS_ERR(path)) {
> +                     err = PTR_ERR(path);
> +                     break;
> +             }
> +             err = ext4_ext_update_path(path, shift, inode,
> +                                        handle, &start_block);
> +             ext4_ext_drop_refs(path);
> +             kfree(path);
> +             if (err)
> +                     break;
> +     }
> +     return err;
> +}
> +
> +/*
> + * ext4_ext_truncate_range: truncate the block range from start
> + * block to end block including the end block from inode.
> + *
> + * @inode: file inode
> + * @start: start block
> + * @end: end block
> + * last_block: last_block number of the inode
> + *
> + * Returns: 0 on success or negative on error
> + */
> +int ext4_ext_truncate_range(struct inode *inode, ext4_lblk_t start,
> +                         ext4_lblk_t end, ext4_lblk_t last_block)
> +{
> +     int ret, credits;
> +     ext4_lblk_t shift = end - start + 1;
> +     handle_t *handle;
> +     loff_t isize_reduced;
> +     int blkbits = inode->i_blkbits;
> +     struct address_space *mapping = inode->i_mapping;
> +
> +     /* sync dirty pages for transfer */
> +     if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
> +             ret = filemap_write_and_wait_range(mapping,
> +                             (loff_t)start << blkbits,
> +                             ((loff_t)(last_block + 1) << blkbits) - 1);
> +             if (ret)
> +                     return ret;
> +     }
> +     truncate_inode_pages_range(inode->i_mapping,
> +                                start << inode->i_blkbits, -1);
> +     ext4_inode_block_unlocked_dio(inode);
> +     inode_dio_wait(inode);
> +     down_write(&EXT4_I(inode)->i_data_sem);
> +     ext4_discard_preallocations(inode);
> +     ret = ext4_es_remove_extent(inode, start, end - start + 1);
> +     if (ret)
> +             goto out;
> +
> +     credits = ext4_writepage_trans_blocks(inode);
> +     handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
> +     if (IS_ERR(handle)) {
> +             ret = PTR_ERR(handle);
> +             goto out;
> +     }
> +
> +     ret = ext4_ext_remove_space(inode, start, end);
> +     if (ret)
> +             goto journal_stop;
> +
> +     ext4_discard_preallocations(inode);
> +
> +     if (end < last_block) {
> +             ret = ext4_ext_update_logical(inode, handle, end + 1,
> +                                           shift, last_block + 1);
> +             if (ret)
> +                     goto journal_stop;
> +     }
> +     isize_reduced = (loff_t)shift << blkbits;
> +     i_size_write(inode, inode->i_size - isize_reduced);
> +     EXT4_I(inode)->i_disksize -= isize_reduced;
> +     inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
> +     ext4_mark_inode_dirty(handle, inode);
> +journal_stop:
> +     ext4_journal_stop(handle);
> +out:
> +     ext4_inode_resume_unlocked_dio(inode);
> +     up_write(&EXT4_I(inode)->i_data_sem);
> +     return ret;
> +}
> +
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 9491ac0..0530daf 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -622,6 +622,68 @@ resizefs_out:
>  
>               return 0;
>       }
> +     case EXT4_IOC_TRUNCATE_BLOCK_RANGE:
> +     {
> +             struct truncate_range tr;
> +             ext4_lblk_t last_block, end_block;
> +             int error;
> +             loff_t i_size = i_size_read(inode);
> +
> +             if (!i_size)
> +                     return 0;
> +
> +             if (!(filp->f_mode & FMODE_WRITE))
> +                     return -EBADF;
> +
> +             if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
> +                     return -EPERM;
> +
> +             if (!S_ISREG(inode->i_mode))
> +                     return -EOPNOTSUPP;
> +
> +             if (IS_SWAPFILE(inode))
> +                     return -EOPNOTSUPP;
> +
> +             if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
> +                     return -EOPNOTSUPP;
> +
> +             if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
> +                 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
> +                     ext4_msg(sb, KERN_ERR,
> +                      "Truncate block range not supported with bigalloc");
> +                     return -EOPNOTSUPP;
> +             }
> +
> +             if (copy_from_user(&tr, (const void *) arg,
> +                             sizeof(struct truncate_range)))
> +                     return -EFAULT;
> +
> +             if (!tr.length)
> +                     return -EINVAL;
> +
> +             end_block = tr.start_block + tr.length - 1;
> +
> +             last_block = ((round_up(i_size,
> +                                     EXT4_BLOCK_SIZE(inode->i_sb)))
> +                           >> inode->i_blkbits) - 1;
> +             if (tr.start_block > end_block ||
> +                 tr.start_block > last_block)
> +                     return -EINVAL;
> +
> +             if (end_block > last_block)
> +                     end_block = last_block;
> +
> +             error = mnt_want_write_file(filp);
> +             if (error)
> +                     return error;
> +
> +             mutex_lock(&inode->i_mutex);
> +             error = ext4_ext_truncate_range(inode, tr.start_block,
> +                                             end_block, last_block);
> +             mutex_unlock(&inode->i_mutex);
> +             mnt_drop_write_file(filp);
> +             return error;
> +     }
>  
>       default:
>               return -ENOTTY;
> -- 
> 1.7.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to