From: Eric Biggers <[email protected]>

Convert f2fs_file_write_iter() to use iomap_dio_rw() for direct I/O
rather than using f2fs_direct_IO() via __generic_file_write_iter().

This is more complicated than the read-side conversion, but it follows a
similar pattern.  Some logic in __generic_file_write_iter() needed to be
re-implemented, while other things are now handled by iomap_dio_rw().
Existing f2fs behavior such as the conditions for falling back to
buffered I/O is retained, except for some things which shouldn't matter
such as the exact time that the timestamps are updated.

Signed-off-by: Eric Biggers <[email protected]>
---
 fs/f2fs/data.c |   7 +-
 fs/f2fs/f2fs.h |   7 +-
 fs/f2fs/file.c | 215 ++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 180 insertions(+), 49 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9243159ee753..0d2bb651483d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1363,11 +1363,6 @@ static int __allocate_data_block(struct dnode_of_data 
*dn, int seg_type)
                f2fs_invalidate_compress_page(sbi, old_blkaddr);
        }
        f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
-
-       /*
-        * i_size will be updated by direct_IO. Otherwise, we'll get stale
-        * data from unwritten block via dio_read.
-        */
        return 0;
 }
 
@@ -3130,7 +3125,7 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,
                        FS_CP_DATA_IO : FS_DATA_IO);
 }
 
-static void f2fs_write_failed(struct inode *inode, loff_t to)
+void f2fs_write_failed(struct inode *inode, loff_t to)
 {
        loff_t i_size = i_size_read(inode);
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f869c4a2f79f..6dbbac05a15c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3238,10 +3238,8 @@ static inline void f2fs_update_iostat(struct 
f2fs_sb_info *sbi,
        spin_lock(&sbi->iostat_lock);
        sbi->rw_iostat[type] += io_bytes;
 
-       if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
-               sbi->rw_iostat[APP_BUFFERED_IO] =
-                       sbi->rw_iostat[APP_WRITE_IO] -
-                       sbi->rw_iostat[APP_DIRECT_IO];
+       if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
+               sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
 
        if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
                sbi->rw_iostat[APP_READ_IO] += io_bytes;
@@ -3625,6 +3623,7 @@ int f2fs_write_single_data_page(struct page *page, int 
*submitted,
                                struct writeback_control *wbc,
                                enum iostat_type io_type,
                                int compr_blocks, bool allow_balance);
+void f2fs_write_failed(struct inode *inode, loff_t to);
 void f2fs_invalidate_page(struct page *page, unsigned int offset,
                        unsigned int length);
 int f2fs_release_page(struct page *page, gfp_t wait);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 52de655ef833..6b8eac6b25d4 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4292,6 +4292,29 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, 
struct iov_iter *to)
        return ret;
 }
 
+static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       ssize_t count;
+       int err;
+
+       if (IS_IMMUTABLE(inode))
+               return -EPERM;
+
+       if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
+               return -EPERM;
+
+       count = generic_write_checks(iocb, from);
+       if (count <= 0)
+               return count;
+
+       err = file_modified(file);
+       if (err)
+               return err;
+       return count;
+}
+
 /*
  * Preallocate blocks for a write request, if it is possible and helpful to do
  * so.  Returns a positive number if blocks may have been preallocated, 0 if no
@@ -4299,15 +4322,14 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, 
struct iov_iter *to)
  * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
  * requested blocks (not just some of them) have been allocated.
  */
-static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
+static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
+                                  bool dio)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        const loff_t pos = iocb->ki_pos;
        const size_t count = iov_iter_count(iter);
        struct f2fs_map_blocks map = {};
-       bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
-                  !f2fs_force_buffered_io(inode, iocb, iter);
        int flag;
        int ret;
 
@@ -4352,13 +4374,153 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, 
struct iov_iter *iter)
        return map.m_len;
 }
 
-static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
+                                       struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
+       ssize_t ret;
+
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               return -EOPNOTSUPP;
+
+       current->backing_dev_info = inode_to_bdi(inode);
+       ret = generic_perform_write(file, from, iocb->ki_pos);
+       current->backing_dev_info = NULL;
+
+       if (ret > 0) {
+               iocb->ki_pos += ret;
+               f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
+       }
+       return ret;
+}
+
+static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
+                                  bool *may_need_sync)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       struct f2fs_inode_info *fi = F2FS_I(inode);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       const bool do_opu = f2fs_lfs_mode(sbi);
+       const int whint_mode = F2FS_OPTION(sbi).whint_mode;
+       const loff_t pos = iocb->ki_pos;
+       const ssize_t count = iov_iter_count(from);
+       const enum rw_hint hint = iocb->ki_hint;
+       unsigned int dio_flags = 0;
+       ssize_t ret;
+
+       trace_f2fs_direct_IO_enter(inode, pos, count, WRITE);
+
+       if (iocb->ki_flags & IOCB_NOWAIT) {
+               /* f2fs_convert_inline_inode() and block allocation can block */
+               if (f2fs_has_inline_data(inode) ||
+                   !f2fs_overwrite_io(inode, pos, count)) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+       } else {
+               ret = f2fs_convert_inline_inode(inode);
+               if (ret)
+                       goto out;
+       }
+
+       if (iocb->ki_flags & IOCB_NOWAIT) {
+               if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+               if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+                       up_read(&fi->i_gc_rwsem[WRITE]);
+                       ret = -EAGAIN;
+                       goto out;
+               }
+       } else {
+               down_read(&fi->i_gc_rwsem[WRITE]);
+               if (do_opu)
+                       down_read(&fi->i_gc_rwsem[READ]);
+       }
+
+       if (whint_mode == WHINT_MODE_OFF)
+               iocb->ki_hint = WRITE_LIFE_NOT_SET;
+
+       if (pos + count > inode->i_size)
+               dio_flags |= IOMAP_DIO_FORCE_WAIT;
+       ret = iomap_dio_rw(iocb, from, &f2fs_iomap_ops, &f2fs_iomap_dio_ops,
+                          dio_flags);
+       if (ret == -ENOTBLK)
+               ret = 0;
+
+       if (whint_mode == WHINT_MODE_OFF)
+               iocb->ki_hint = hint;
+
+       if (do_opu)
+               up_read(&fi->i_gc_rwsem[READ]);
+
+       up_read(&fi->i_gc_rwsem[WRITE]);
+
+       if (ret < 0) {
+               if (ret == -EIOCBQUEUED)
+                       f2fs_update_iostat(sbi, APP_DIRECT_IO,
+                                          count - iov_iter_count(from));
+               goto out;
+       }
+       if (pos + ret > inode->i_size)
+               f2fs_i_size_write(inode, pos + ret);
+       f2fs_update_iostat(sbi, APP_DIRECT_IO, ret);
+       if (!do_opu)
+               set_inode_flag(inode, FI_UPDATE_WRITE);
+
+       if (iov_iter_count(from)) {
+               ssize_t ret2;
+               loff_t bufio_start_pos = iocb->ki_pos;
+
+               /*
+                * The direct write was partial, so we need to fall back to a
+                * buffered write for the remainder.
+                */
+
+               ret2 = f2fs_buffered_write_iter(iocb, from);
+               if (iov_iter_count(from))
+                       f2fs_write_failed(inode, iocb->ki_pos);
+               if (ret2 < 0)
+                       goto out;
+
+               /*
+                * Ensure that the pagecache pages are written to disk and
+                * invalidated to preserve the expected O_DIRECT semantics.
+                */
+               if (ret2 > 0) {
+                       loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
+
+                       ret += ret2;
+
+                       ret2 = filemap_write_and_wait_range(file->f_mapping,
+                                                           bufio_start_pos,
+                                                           bufio_end_pos);
+                       if (ret2 < 0)
+                               goto out;
+                       invalidate_mapping_pages(file->f_mapping,
+                                                bufio_start_pos >> PAGE_SHIFT,
+                                                bufio_end_pos >> PAGE_SHIFT);
+               }
+       } else {
+               /* iomap_dio_rw() already handled the generic_write_sync(). */
+               *may_need_sync = false;
+       }
+out:
+       trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
+       return ret;
+}
+
+static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
        const loff_t orig_pos = iocb->ki_pos;
        const size_t orig_count = iov_iter_count(from);
        loff_t target_size;
+       bool dio;
+       bool may_need_sync = true;
        int preallocated;
        ssize_t ret;
 
@@ -4381,48 +4543,26 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
                inode_lock(inode);
        }
 
-       if (unlikely(IS_IMMUTABLE(inode))) {
-               ret = -EPERM;
-               goto out_unlock;
-       }
-
-       if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
-               ret = -EPERM;
-               goto out_unlock;
-       }
-
-       ret = generic_write_checks(iocb, from);
+       ret = f2fs_write_checks(iocb, from);
        if (ret <= 0)
                goto out_unlock;
 
-       if (iocb->ki_flags & IOCB_NOWAIT) {
-               if (!f2fs_overwrite_io(inode, iocb->ki_pos,
-                                      iov_iter_count(from)) ||
-                   f2fs_has_inline_data(inode) ||
-                   f2fs_force_buffered_io(inode, iocb, from)) {
-                       ret = -EAGAIN;
-                       goto out_unlock;
-               }
-       }
-       if (iocb->ki_flags & IOCB_DIRECT) {
-               /*
-                * Convert inline data for Direct I/O before entering
-                * f2fs_direct_IO().
-                */
-               ret = f2fs_convert_inline_inode(inode);
-               if (ret)
-                       goto out_unlock;
-       }
+       /* Determine whether we will do a direct write or a buffered write. */
+       dio = f2fs_should_use_dio(inode, iocb, from);
 
        /* Possibly preallocate the blocks for the write. */
        target_size = iocb->ki_pos + iov_iter_count(from);
-       preallocated = f2fs_preallocate_blocks(iocb, from);
+       preallocated = f2fs_preallocate_blocks(iocb, from, dio);
        if (preallocated < 0) {
                ret = preallocated;
                goto out_unlock;
        }
 
-       ret = __generic_file_write_iter(iocb, from);
+       /* Do the actual write. */
+       if (dio)
+               ret = f2fs_dio_write_iter(iocb, from, &may_need_sync);
+       else
+               ret = f2fs_buffered_write_iter(iocb, from);
 
        /* Don't leave any preallocated blocks around past i_size. */
        if (preallocated > 0 && inode->i_size < target_size) {
@@ -4433,14 +4573,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
                up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
        }
        clear_inode_flag(inode, FI_PREALLOCATED_ALL);
-
-       if (ret > 0)
-               f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
 out_unlock:
        inode_unlock(inode);
 out:
        trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
-       if (ret > 0)
+       if (ret > 0 && may_need_sync)
                ret = generic_write_sync(iocb, ret);
        return ret;
 }
-- 
2.32.0



_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to