This allows us to have an in-kernel copy mechanism that avoids frequent
switches between kernel and user space.  This is especially useful so
NFSD can support server-side copies.

The default (flags=0) means to first attempt copy acceleration, but use
the pagecache if that fails.

I moved the rw_verify_area() calls into the fallback code since some
filesystems can handle reflinking a large range.

Signed-off-by: Anna Schumaker <anna.schuma...@netapp.com>
Reviewed-by: Darrick J. Wong <darrick.w...@oracle.com>
Reviewed-by: Padraig Brady <p...@draigbrady.com>
---
v6:
- Don't reflink by default.
- Reword commit message.
---
 fs/read_write.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 20f147d..1fd555c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1329,6 +1329,24 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, 
in_fd,
 }
 #endif
 
+static ssize_t vfs_copy_fr_copy(struct file *file_in, loff_t pos_in,
+                               struct file *file_out, loff_t pos_out,
+                               size_t len)
+{
+       ssize_t ret = rw_verify_area(READ, file_in, &pos_in, len);
+
+       if (ret >= 0) {
+               len = ret;
+               ret = rw_verify_area(WRITE, file_out, &pos_out, len);
+               if (ret >= 0)
+                       len = ret;
+       }
+       if (ret < 0)
+               return ret;
+
+       return do_splice_direct(file_in, &pos_in, file_out, &pos_out, len, 0);
+}
+
 /*
  * copy_file_range() differs from regular file read and write in that it
  * specifically allows return partial success.  When it does so is up to
@@ -1345,17 +1363,10 @@ ssize_t vfs_copy_file_range(struct file *file_in, 
loff_t pos_in,
        if (flags & ~(COPY_FR_REFLINK))
                return -EINVAL;
 
-       /* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT  */
-       ret = rw_verify_area(READ, file_in, &pos_in, len);
-       if (ret >= 0)
-               ret = rw_verify_area(WRITE, file_out, &pos_out, len);
-       if (ret < 0)
-               return ret;
-
        if (!(file_in->f_mode & FMODE_READ) ||
            !(file_out->f_mode & FMODE_WRITE) ||
            (file_out->f_flags & O_APPEND) ||
-           !file_out->f_op || !file_out->f_op->copy_file_range)
+           !file_out->f_op)
                return -EBADF;
 
        /* this could be relaxed once a method supports cross-fs copies */
@@ -1370,8 +1381,13 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t 
pos_in,
        if (ret)
                return ret;
 
-       ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, 
pos_out,
-                                             len, flags);
+       ret = -EOPNOTSUPP;
+       if (file_out->f_op->copy_file_range)
+               ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
+                                                     pos_out, len, flags);
+       if ((ret == -EOPNOTSUPP) && !(flags & COPY_FR_REFLINK))
+               ret = vfs_copy_fr_copy(file_in, pos_in, file_out, pos_out, len);
+
        if (ret > 0) {
                fsnotify_access(file_in);
                add_rchar(current, ret);
-- 
2.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-api" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to