[PATCH v2 2/2] libstdc++: use copy_file_range, improve sendfile in filesystem::copy_file
From c028a0072c7573cfac90289b3606ba19cb8272a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= Date: Wed, 8 Mar 2023 19:37:43 +0100 Subject: [PATCH 2/2] libstdc++: use copy_file_range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit copy_file_range is a recent-ish syscall for copying files. It is similar to sendfile but allows filesystem-specific optimizations. Common are: Reflinks: BTRFS, XFS, ZFS (does not implement the syscall yet) Server-side copy: NFS, SMB, Ceph If copy_file_range is not available for the given files, fall back to sendfile / userspace copy. libstdc++-v3/ChangeLog: * acinclude.m4 (_GLIBCXX_USE_COPY_FILE_RANGE): define * config.h.in: Regenerate. * configure: Regenerate. * src/filesystem/ops-common.h: use copy_file_range in std::filesystem::copy_file Signed-off-by: Jannik Glückert --- libstdc++-v3/acinclude.m4| 20 libstdc++-v3/config.h.in | 3 ++ libstdc++-v3/configure | 62 libstdc++-v3/src/filesystem/ops-common.h | 50 +++ 4 files changed, 135 insertions(+) diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4 index 85a09a5a869..4cf02dc6e4e 100644 --- a/libstdc++-v3/acinclude.m4 +++ b/libstdc++-v3/acinclude.m4 @@ -4581,6 +4581,7 @@ dnl _GLIBCXX_USE_UTIMENSAT dnl _GLIBCXX_USE_ST_MTIM dnl _GLIBCXX_USE_FCHMOD dnl _GLIBCXX_USE_FCHMODAT +dnl _GLIBCXX_USE_COPY_FILE_RANGE dnl _GLIBCXX_USE_SENDFILE dnl HAVE_LINK dnl HAVE_LSEEK @@ -4779,6 +4780,25 @@ dnl if test $glibcxx_cv_truncate = yes; then AC_DEFINE(HAVE_TRUNCATE, 1, [Define if truncate is available in .]) fi +dnl + AC_CACHE_CHECK([for copy_file_range that can copy files], +glibcxx_cv_copy_file_range, [dnl +case "${target_os}" in + linux*) + GCC_TRY_COMPILE_OR_LINK( + [#include ], + [copy_file_range(1, nullptr, 2, nullptr, 1, 0);], + [glibcxx_cv_copy_file_range=yes], + [glibcxx_cv_copy_file_range=no]) + ;; + *) + glibcxx_cv_copy_file_range=no + ;; +esac + ]) + if test $glibcxx_cv_copy_file_range = yes; then +AC_DEFINE(_GLIBCXX_USE_COPY_FILE_RANGE, 1, [Define if copy_file_range is available in .]) + fi dnl AC_CACHE_CHECK([for sendfile that can copy files], glibcxx_cv_sendfile, [dnl diff --git a/libstdc++-v3/src/filesystem/ops-common.h b/libstdc++-v3/src/filesystem/ops-common.h index 9e1b1d41dc5..202728baef2 100644 --- a/libstdc++-v3/src/filesystem/ops-common.h +++ b/libstdc++-v3/src/filesystem/ops-common.h @@ -49,6 +49,9 @@ #ifdef NEED_DO_COPY_FILE # include # include +# ifdef _GLIBCXX_USE_COPY_FILE_RANGE +# include // copy_file_range +# endif # ifdef _GLIBCXX_USE_SENDFILE # include // sendfile # include // lseek @@ -359,6 +362,31 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM } #ifdef NEED_DO_COPY_FILE +#ifdef _GLIBCXX_USE_COPY_FILE_RANGE + bool + copy_file_copy_file_range(int fd_in, int fd_out, size_t length) noexcept + { +// a zero-length file is either empty, or not copyable by this syscall +// return early to avoid the syscall cost +if (length == 0) + { +errno = EINVAL; +return false; + } +size_t bytes_left = length; +off64_t off_in = 0, off_out = 0; +ssize_t bytes_copied; +do { + bytes_copied = ::copy_file_range(fd_in, _in, fd_out, _out, bytes_left, 0); + bytes_left -= bytes_copied; +} while (bytes_left > 0 && bytes_copied > 0); +if (bytes_copied < 0) + { +return false; + } +return true; + } +#endif #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS bool copy_file_sendfile(int fd_in, int fd_out, size_t length) noexcept @@ -527,6 +555,28 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM bool has_copied = false; +#ifdef _GLIBCXX_USE_COPY_FILE_RANGE +if (!has_copied) + has_copied = copy_file_copy_file_range(in.fd, out.fd, from_st->st_size); +if (!has_copied) + { +// EINVAL: src and dst are the same file (this is not cheaply detectable from userspace) +// EINVAL: copy_file_range is unsupported for this file type by the underlying filesystem +// ENOTSUP: undocumented, can arise with old kernels and NFS +// EOPNOTSUPP: filesystem does not implement copy_file_range +// ETXTBSY: src or dst is an active swapfile (nonsensical, but allowed with normal copying) +// EXDEV: src and dst are on different filesystems that do not support cross-fs copy_file_range +// ENOENT: undocumented, can arise with CIFS +// ENOSYS: unsupported by kernel or blocked by seccomp +if (errno != EINVAL && errno != ENOTSUP && errno != EOPNOTSUPP +&& errno != ETXTBSY && errno != EXDEV && errno != ENOENT && errno != ENOSYS) + { +ec.assign(errno, std::generic_category()); +return false; + } + } +#endif
[PATCH v2 1/2] libstdc++: use copy_file_range, improve sendfile in filesystem::copy_file
This iteration improves error handling for copy_file_range, particularly around undocumented error codes in earlier kernel versions. Additionally this fixes the userspace copy fallback to handle zero-length files such as in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108178. Lastly, the case "src gets resized during the copy loop" is now considered and will return true once the loop hits EOF (this is the only situation, aside from a zero-length src, where sendfile and copy_file_range return 0). Best Jannik From b55eb8dccaa44f07d8acbe6294326a46c920b04f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= Date: Mon, 6 Mar 2023 20:52:08 +0100 Subject: [PATCH 1/2] libstdc++: also use sendfile for big files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit we were previously only using sendfile for files smaller than 2GB, as sendfile needs to be called repeatedly for files bigger than that. some quick numbers, copying a 16GB file, average of 10 repetitions: old: real: 13.4s user: 0.14s sys : 7.43s new: real: 8.90s user: 0.00s sys : 3.68s Additionally, this fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108178 libstdc++-v3/ChangeLog: * acinclude.m4 (_GLIBCXX_HAVE_LSEEK): define * config.h.in: Regenerate. * configure: Regenerate. * src/filesystem/ops-common.h: enable sendfile for files >2GB in std::filesystem::copy_file, skip zero-length files Signed-off-by: Jannik Glückert --- libstdc++-v3/acinclude.m4| 51 + libstdc++-v3/config.h.in | 3 + libstdc++-v3/configure | 127 --- libstdc++-v3/src/filesystem/ops-common.h | 86 --- 4 files changed, 175 insertions(+), 92 deletions(-) diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4 index 5136c0571e8..85a09a5a869 100644 --- a/libstdc++-v3/acinclude.m4 +++ b/libstdc++-v3/acinclude.m4 @@ -4583,6 +4583,7 @@ dnl _GLIBCXX_USE_FCHMOD dnl _GLIBCXX_USE_FCHMODAT dnl _GLIBCXX_USE_SENDFILE dnl HAVE_LINK +dnl HAVE_LSEEK dnl HAVE_READLINK dnl HAVE_SYMLINK dnl @@ -4718,25 +4719,6 @@ dnl if test $glibcxx_cv_fchmodat = yes; then AC_DEFINE(_GLIBCXX_USE_FCHMODAT, 1, [Define if fchmodat is available in .]) fi -dnl - AC_CACHE_CHECK([for sendfile that can copy files], -glibcxx_cv_sendfile, [dnl -case "${target_os}" in - gnu* | linux* | solaris* | uclinux*) - GCC_TRY_COMPILE_OR_LINK( - [#include ], - [sendfile(1, 2, (off_t*)0, sizeof 1);], - [glibcxx_cv_sendfile=yes], - [glibcxx_cv_sendfile=no]) - ;; - *) - glibcxx_cv_sendfile=no - ;; -esac - ]) - if test $glibcxx_cv_sendfile = yes; then -AC_DEFINE(_GLIBCXX_USE_SENDFILE, 1, [Define if sendfile is available in .]) - fi dnl AC_CACHE_CHECK([for link], glibcxx_cv_link, [dnl @@ -4749,6 +4731,18 @@ dnl if test $glibcxx_cv_link = yes; then AC_DEFINE(HAVE_LINK, 1, [Define if link is available in .]) fi +dnl + AC_CACHE_CHECK([for lseek], +glibcxx_cv_lseek, [dnl +GCC_TRY_COMPILE_OR_LINK( + [#include ], + [lseek(1, 0, SEEK_SET);], + [glibcxx_cv_lseek=yes], + [glibcxx_cv_lseek=no]) + ]) + if test $glibcxx_cv_lseek = yes; then +AC_DEFINE(HAVE_LSEEK, 1, [Define if lseek is available in .]) + fi dnl AC_CACHE_CHECK([for readlink], glibcxx_cv_readlink, [dnl @@ -4785,6 +4779,25 @@ dnl if test $glibcxx_cv_truncate = yes; then AC_DEFINE(HAVE_TRUNCATE, 1, [Define if truncate is available in .]) fi +dnl + AC_CACHE_CHECK([for sendfile that can copy files], +glibcxx_cv_sendfile, [dnl +case "${target_os}" in + gnu* | linux* | solaris* | uclinux*) + GCC_TRY_COMPILE_OR_LINK( + [#include ], + [sendfile(1, 2, (off_t*)0, sizeof 1);], + [glibcxx_cv_sendfile=yes], + [glibcxx_cv_sendfile=no]) + ;; + *) + glibcxx_cv_sendfile=no + ;; +esac + ]) + if test $glibcxx_cv_sendfile = yes && test $glibcxx_cv_lseek = yes; then +AC_DEFINE(_GLIBCXX_USE_SENDFILE, 1, [Define if sendfile is available in .]) + fi dnl AC_CACHE_CHECK([for fdopendir], glibcxx_cv_fdopendir, [dnl diff --git a/libstdc++-v3/src/filesystem/ops-common.h b/libstdc++-v3/src/filesystem/ops-common.h index abbfca43e5c..9e1b1d41dc5 100644 --- a/libstdc++-v3/src/filesystem/ops-common.h +++ b/libstdc++-v3/src/filesystem/ops-common.h @@ -51,6 +51,7 @@ # include # ifdef _GLIBCXX_USE_SENDFILE # include // sendfile +# include // lseek # endif #endif @@ -358,6 +359,32 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM } #ifdef NEED_DO_COPY_FILE +#if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS + bool + copy_file_sendfile(int fd_in, int fd_out, size_t length) noexcept + { +// a zero-length file is either empty, or not copyable by this syscall +// return early to avoid the syscall cost +if (length == 0) + { +errno
[PATCH] libstdc++: use copy_file_range, improve sendfile in filesystem::copy_file
The current copy_file implementation is suboptimal. It only uses sendfile for files smaller than 2GB, falling back to a userspace copy, and does not support copy_file_range at all. copy_file_range is particularly of increasing importance with the adoption of reflinks in filesystems. I am pretty sure I got some of the formatting wrong, feel free to tear apart. I don't know if sendfile has identical semantics on linux as it does on solaris, if someone could test with a big file that'd be great. Otherwise, this should not regress. The implementation will fall back to sendfile / userspace copy if copy_file_range is not available for the target paths. The copy implementations for sendfile and copy_file_range were put into separate functions and the callee code simplified to the point where you can basically just copy-paste it to add a new implementation, should new interesting syscalls pop up. Best Jannik From 306f9d5e1076ff936ef35942bca546ce188fba81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= Date: Mon, 6 Mar 2023 20:52:08 +0100 Subject: [PATCH 1/2] libstdc++: also use sendfile for big files we were previously only using sendfile for files smaller than 2GB, as sendfile needs to be called repeatedly for files bigger than that. some quick numbers, copying a 16GB file, average of 10 repetitions: old: real: 13.4s user: 0.14s sys : 7.43s new: real: 8.90s user: 0.00s sys : 3.68s libstdc++-v3/ChangeLog: * src/filesystem/ops-common.h: enable sendfile for files >2GB in std::filesystem::copy_file --- libstdc++-v3/src/filesystem/ops-common.h | 77 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/libstdc++-v3/src/filesystem/ops-common.h b/libstdc++-v3/src/filesystem/ops-common.h index abbfca43e5c..d8afc6a4d64 100644 --- a/libstdc++-v3/src/filesystem/ops-common.h +++ b/libstdc++-v3/src/filesystem/ops-common.h @@ -358,6 +358,24 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM } #ifdef NEED_DO_COPY_FILE +#if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS + bool + copy_file_sendfile(int fd_in, int fd_out, size_t length) noexcept + { +size_t bytes_left = length; +off_t offset = 0; +ssize_t bytes_copied; +do { + bytes_copied = ::sendfile(fd_out, fd_in, , bytes_left); + if (bytes_copied < 0) +{ + return false; +} + bytes_left -= bytes_copied; +} while (bytes_left > 0 && bytes_copied > 0); +return true; + } +#endif bool do_copy_file(const char_type* from, const char_type* to, std::filesystem::copy_options_existing_file options, @@ -498,28 +516,30 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM return false; } -size_t count = from_st->st_size; +bool has_copied = false; + #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS -off_t offset = 0; -ssize_t n = ::sendfile(out.fd, in.fd, , count); -if (n < 0 && errno != ENOSYS && errno != EINVAL) +if (!has_copied) + has_copied = copy_file_sendfile(in.fd, out.fd, from_st->st_size); +if (!has_copied) { - ec.assign(errno, std::generic_category()); - return false; + if (errno != ENOSYS && errno != EINVAL) +{ + ec.assign(errno, std::generic_category()); + return false; +} } -if ((size_t)n == count) +#endif + +if (has_copied) { - if (!out.close() || !in.close()) - { - ec.assign(errno, std::generic_category()); - return false; - } - ec.clear(); - return true; +if (!out.close() || !in.close()) + { + ec.assign(errno, std::generic_category()); + return false; + } +return true; } -else if (n > 0) - count -= n; -#endif // _GLIBCXX_USE_SENDFILE using std::ios; __gnu_cxx::stdio_filebuf sbin(in.fd, ios::in|ios::binary); @@ -530,29 +550,12 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM if (sbout.is_open()) out.fd = -1; -#ifdef _GLIBCXX_USE_SENDFILE -if (n != 0) +if (!(std::ostream() << )) { - if (n < 0) - n = 0; - - const auto p1 = sbin.pubseekoff(n, ios::beg, ios::in); - const auto p2 = sbout.pubseekoff(n, ios::beg, ios::out); - - const std::streampos errpos(std::streamoff(-1)); - if (p1 == errpos || p2 == errpos) - { - ec = std::make_error_code(std::errc::io_error); - return false; - } + ec = std::make_error_code(std::errc::io_error); + return false; } -#endif -if (count && !(std::ostream() << )) - { - ec = std::make_error_code(std::errc::io_error); - return false; - } if (!sbout.close() || !sbin.close()) { ec.assign(errno, std::generic_category()); -- 2.39.2 From 72b7ad044246e496d90b5f241f59bd0b69e214fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jannik=20Gl=C3=BCckert?= Date: Mon, 6 Mar 2023 23:11:41 +0100 Subject: [PATCH 2/2] libstdc++: use copy_file_range