Original patch rh6/diff-ext4-fallocate-mode-convert-and-extend-v3

The patch introduces new fallocate mode: FALLOC_FL_CONVERT_AND_EXTEND. It
performs two actions:
 - convert all uninitialized extends in the range <offset, offset + length>
 - set i_size to "offset + length".

The feature will be used by ploop io_direct module for optimizing submit_alloc
path.

Changed in v2 (thanks to Dima for findings):
 - moved journal start/stop into while(){...}
 - added update_fsync_trans call

Changed in v3 (thanks again to Dima for findings):
 - protected operations on extnet tree by i_data_sem

https://jira.sw.ru/browse/PSBM-22381

Signed-off-by: Dmitry Monakhov <dmonak...@openvz.org>
---
 fs/ext4/extents.c           |  135 ++++++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/falloc.h |    3 +
 2 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 606a47c..dfa4e7a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4448,6 +4448,131 @@ static void ext4_falloc_update_inode(struct inode 
*inode,
 
 }
 
+
+static int ext4_convert_and_extend_locked(struct inode *inode, loff_t offset,
+                                         loff_t len)
+{
+       struct ext4_ext_path *path = NULL;
+       loff_t new_size = offset + len;
+       ext4_lblk_t iblock = offset >> inode->i_blkbits;
+       ext4_lblk_t new_iblock = new_size >> inode->i_blkbits;
+       unsigned int max_blocks = new_iblock - iblock;
+       handle_t *handle;
+       unsigned int credits;
+       int err = 0;
+       int ret = 0;
+
+       if ((loff_t)iblock << inode->i_blkbits != offset ||
+           (loff_t)new_iblock << inode->i_blkbits != new_size)
+               return -EINVAL;
+
+       while (max_blocks > 0) {
+               struct ext4_extent *ex;
+               ext4_lblk_t ee_block;
+               ext4_fsblk_t ee_start;
+               unsigned short ee_len;
+               int depth;
+
+               /*
+                * credits to insert 1 extents into extent tree
+                */
+               credits = ext4_chunk_trans_blocks(inode, max_blocks);
+               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+               if (IS_ERR(handle))
+                      return PTR_ERR(handle);
+
+               down_write((&EXT4_I(inode)->i_data_sem));
+
+               /* find extent for this block */
+               path = ext4_ext_find_extent(inode, iblock, NULL);
+               if (IS_ERR(path)) {
+                       err = PTR_ERR(path);
+                       goto done;
+               }
+
+               depth = ext_depth(inode);
+               ex = path[depth].p_ext;
+               BUG_ON(ex == NULL && depth != 0);
+
+               if (ex == NULL) {
+                       err = -ENOENT;
+                       goto done;
+               }
+
+               ee_block = le32_to_cpu(ex->ee_block);
+               ee_start = ext4_ext_pblock(ex);
+               ee_len = ext4_ext_get_actual_len(ex);
+               if (!in_range(iblock, ee_block, ee_len)) {
+                       err = -ERANGE;
+                       goto done;
+               }
+
+               if (ext4_ext_is_uninitialized(ex)) {
+                       struct ext4_map_blocks map = {0};
+
+                       map.m_lblk = iblock;
+                       map.m_len = max_blocks;
+                       err = ext4_convert_unwritten_extents_endio(handle, 
inode,
+                                                                  &map,
+                                                                  path);
+                       if (err < 0)
+                               goto done;
+
+                       ext4_update_inode_fsync_trans(handle, inode, 1);
+                       err = check_eofblocks_fl(handle, inode, iblock, path,
+                                                max_blocks);
+                       if (err)
+                               goto done;
+               }
+
+
+               up_write((&EXT4_I(inode)->i_data_sem));
+
+               iblock += ee_len;
+               max_blocks -= (ee_len < max_blocks) ? ee_len : max_blocks;
+
+               if (!max_blocks && new_size > i_size_read(inode)) {
+                       i_size_write(inode, new_size);
+                       ext4_update_i_disksize(inode, new_size);
+               }
+
+               ret = ext4_mark_inode_dirty(handle, inode);
+done:
+               if (err)
+                       up_write((&EXT4_I(inode)->i_data_sem));
+               else
+                       err = ret;
+
+               if (path) {
+                       ext4_ext_drop_refs(path);
+                       kfree(path);
+               }
+
+               ret = ext4_journal_stop(handle);
+               if (!err && ret)
+                       err = ret;
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int ext4_convert_and_extend(struct inode *inode, loff_t offset,
+                                  loff_t len)
+{
+       int err;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       mutex_lock(&inode->i_mutex);
+       err = ext4_convert_and_extend_locked(inode, offset, len);
+       mutex_unlock(&inode->i_mutex);
+
+       return err;
+}
+
 /*
  * preallocate space for a file. This implements ext4's fallocate file
  * operation, which gets called from sys_fallocate system call.
@@ -4474,20 +4599,26 @@ long ext4_fallocate(struct file *file, int mode, loff_t 
offset, loff_t len)
         */
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
-
+ 
        /* Return error if mode is not supported */
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+                    FALLOC_FL_CONVERT_AND_EXTEND))
                return -EOPNOTSUPP;
 
        if (mode & FALLOC_FL_PUNCH_HOLE)
                return ext4_punch_hole(file, offset, len);
 
+       if (mode & FALLOC_FL_CONVERT_AND_EXTEND)
+               return ext4_convert_and_extend(inode, offset, len);
+
        ret = ext4_convert_inline_data(inode);
        if (ret)
                return ret;
 
        trace_ext4_fallocate_enter(inode, offset, len, mode);
        map.m_lblk = offset >> blkbits;
+
+
        /*
         * We can't just convert len to max_blocks because
         * If blocksize = 4096 offset = 3072 and len = 2048
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 990c4cc..17b618f 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -4,6 +4,9 @@
 #define FALLOC_FL_KEEP_SIZE    0x01 /* default is extend size */
 #define FALLOC_FL_PUNCH_HOLE   0x02 /* de-allocates range */
 #define FALLOC_FL_NO_HIDE_STALE        0x04 /* reserved codepoint */
+#define FALLOC_FL_CONVERT_AND_EXTEND 0x100 /* mark extents as initialized
+                                           * and extend i_size */
+
 
 
 #endif /* _UAPI_FALLOC_H_ */
-- 
1.7.1

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to