Pádraig Brady <[email protected]> writes: > Cool. The read/write fallback should work. > I see this is now diagnosed: > > $ strace -e inject=splice:error=EIO:when=3 \ src/cat /dev/zero > t.c > > However errors on the writing splice are not always diagnosed: > > $ strace -e inject=splice:error=EIO:when=4 \ > src/cat /dev/zero > t.c > > I think think that just needs a clause like you have already done on the read > side.
Oops, right. Done in the attached patch. I also simplified things to put the error call in a single place after the "done" label. I find it easier to follow that way. > As for tests, the odd and even straces above seem useful. I also added those. > Another issue I noticed is that in yes.c we avoid the splice > if increase_pipe_size() returns 0. cat.c should do the same. The increase_pipe_size() function will never return 0. It returns a reasonable guess if fcntl fails. I think that you were probably thinking of pipe_splice_size() in src/yes.c. That function returns 0 if the pipe size is smaller than the buffer. Collin
>From bc7eca1f350f327d6dd92768979431a990abed54 Mon Sep 17 00:00:00 2001 Message-ID: <bc7eca1f350f327d6dd92768979431a990abed54.1775121177.git.collin.fu...@gmail.com> From: Collin Funk <[email protected]> Date: Sun, 29 Mar 2026 16:13:01 -0700 Subject: [PATCH v3] cat: use splice if operating on pipes or if copy_file_range fails On a AMD Ryzen 7 3700X system: $ timeout 10 taskset 1 ./src/cat-prev /dev/zero \ | taskset 2 pv -r > /dev/null [1.84GiB/s] $ timeout 10 taskset 1 ./src/cat /dev/zero \ | taskset 2 pv -r > /dev/null [7.92GiB/s] On a Power10 system: $ taskset 1 ./src/yes | timeout 10 taskset 2 ./src/cat-prev \ | taskset 3 pv -r > /dev/null [12.6GiB/s] $ taskset 1 ./src/yes | timeout 10 taskset 2 ./src/cat \ | taskset 3 pv -r > /dev/null [61.3GiB/s] * NEWS: Mention the improvement. * src/cat.c: Include isapipe.h, splice.h, and unistd--.h. (splice_cat): New function. (main): Use it. * src/local.mk (noinst_HEADERS): Add src/splice.h. * src/splice.h: New file, based on definitions from src/yes.c. * src/yes.c: Include splice.h. (pipe_splice_size): Use increase_pipe_size from src/splice.h. (SPLICE_PIPE_SIZE): Remove definition, moved to src/splice.h. * tests/cat/splice.sh: New file, based on some tests in tests/misc/yes.sh. * tests/local.mk (all_tests): Add the new test. --- NEWS | 4 ++ src/cat.c | 121 ++++++++++++++++++++++++++++++++++++++++++-- src/local.mk | 1 + src/splice.h | 41 +++++++++++++++ src/yes.c | 15 +----- tests/cat/splice.sh | 64 +++++++++++++++++++++++ tests/local.mk | 1 + 7 files changed, 231 insertions(+), 16 deletions(-) create mode 100644 src/splice.h create mode 100755 tests/cat/splice.sh diff --git a/NEWS b/NEWS index 2fabd07b7..e5bd445e4 100644 --- a/NEWS +++ b/NEWS @@ -39,6 +39,10 @@ GNU coreutils NEWS -*- outline -*- ** Improvements + 'cat' now uses zero-copy I/O on Linux when the input or output are pipes to + significantly increase throughput. + E.g., throughput improved 5x from 12.6GiB/s to 61.3GiB/s on a Power10 system. + 'df --local' recognises more file system types as remote. Specifically: autofs, ncpfs, smb, smb2, gfs, gfs2, userlandfs. diff --git a/src/cat.c b/src/cat.c index f9c92005c..12a968635 100644 --- a/src/cat.c +++ b/src/cat.c @@ -37,6 +37,9 @@ #include "ioblksize.h" #include "fadvise.h" #include "full-write.h" +#include "isapipe.h" +#include "splice.h" +#include "unistd--.h" #include "xbinary-io.h" /* The official name of this program (e.g., no 'g' prefix). */ @@ -545,6 +548,109 @@ copy_cat (void) } } +/* Copy data from input to output using splice if possible. + Return 1 if successful, 0 if ordinary read+write should be tried, + -1 if a serious problem has been diagnosed. */ + +static int +splice_cat (void) +{ + bool some_copied = false; + bool ok = true; + +#if HAVE_SPLICE + + static int stdout_is_pipe = -1; + if (stdout_is_pipe == -1) + stdout_is_pipe = 0 < isapipe (STDOUT_FILENO); + + bool input_is_pipe = 0 < isapipe (input_desc); + + idx_t pipe_size = 0; + if (input_is_pipe) + pipe_size = increase_pipe_size (input_desc); + if (stdout_is_pipe) + pipe_size = MAX (pipe_size, increase_pipe_size (STDOUT_FILENO)); + + int pipefd[2] = { -1, -1 }; + int outfd; + + /* Avoid creating an intermediate pipe if possible. */ + if (input_is_pipe || stdout_is_pipe) + outfd = STDOUT_FILENO; + else + { + if (pipe (pipefd) < 0) + return false; + outfd = pipefd[1]; + pipe_size = increase_pipe_size (pipefd[0]); + } + + while (true) + { + ssize_t bytes_read = splice (input_desc, NULL, outfd, NULL, + pipe_size, 0); + /* If we successfully splice'd input previously, assume that any + subsequent error is fatal. If not, then fall back to read + and write. */ + ok = 0 <= bytes_read || ! some_copied; + if (bytes_read <= 0) + goto done; + if (outfd == STDOUT_FILENO) + some_copied = true; + else + { + /* We need to drain the intermediate pipe to standard output. */ + while (0 < bytes_read) + { + ssize_t bytes_written = splice (pipefd[0], NULL, STDOUT_FILENO, + NULL, pipe_size, 0); + /* If we successfully splice'd output, assume any subsequent + error is fatal. If not, than drain the intermediate pipe and + continue using read and write. */ + if (bytes_written < 0) + { + if (some_copied) + ok = false; + else + { + char buf[BUFSIZ]; + while (0 < bytes_read) + { + ssize_t count = MIN (bytes_read, sizeof buf); + ssize_t n_read = read (pipefd[0], buf, count); + ok = 0 <= n_read; + if (n_read <= 0) + goto done; + if (full_write (STDOUT_FILENO, buf, n_read) + != n_read) + write_error (); + bytes_read -= n_read; + } + } + } + if (bytes_written <= 0) + goto done; + some_copied = true; + bytes_read -= bytes_written; + } + } + } + + done: + if (! ok) + error (0, errno, "%s", quotef (infile)); + if (0 <= pipefd[0]) + { + int saved_errno = errno; + close (pipefd[0]); + close (pipefd[1]); + errno = saved_errno; + } +#endif + + return ok ? some_copied : -1; +} int main (int argc, char **argv) @@ -760,9 +866,18 @@ main (int argc, char **argv) } else { - insize = MAX (insize, outsize); - inbuf = xalignalloc (page_size, insize); - ok &= simple_cat (inbuf, insize); + int splice_cat_status = splice_cat (); + if (splice_cat_status != 0) + { + inbuf = NULL; + ok &= 0 < splice_cat_status; + } + else + { + insize = MAX (insize, outsize); + inbuf = xalignalloc (page_size, insize); + ok &= simple_cat (inbuf, insize); + } } } else diff --git a/src/local.mk b/src/local.mk index bf88f7d0e..9d9c9814b 100644 --- a/src/local.mk +++ b/src/local.mk @@ -61,6 +61,7 @@ noinst_HEADERS = \ src/remove.h \ src/set-fields.h \ src/show-date.h \ + src/splice.h \ src/statx.h \ src/system.h \ src/temp-stream.h \ diff --git a/src/splice.h b/src/splice.h new file mode 100644 index 000000000..1fb55054d --- /dev/null +++ b/src/splice.h @@ -0,0 +1,41 @@ +/* Common definitions for splice and vmsplice. + Copyright (C) 2026 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +#ifndef SPLICE_H +# define SPLICE_H 1 + +# if HAVE_SPLICE + +/* Empirically determined pipe size for best throughput. + Needs to be <= /proc/sys/fs/pipe-max-size */ +enum { SPLICE_PIPE_SIZE = 512 * 1024 }; + +static inline idx_t +increase_pipe_size (int fd) +{ + int pipe_cap = 0; +# if defined F_SETPIPE_SZ && defined F_GETPIPE_SZ + if ((pipe_cap = fcntl (fd, F_SETPIPE_SZ, SPLICE_PIPE_SIZE)) < 0) + pipe_cap = fcntl (fd, F_GETPIPE_SZ); +# endif + if (pipe_cap <= 0) + pipe_cap = 64 * 1024; + return pipe_cap; +} + +# endif + +#endif diff --git a/src/yes.c b/src/yes.c index 1a1d74ce5..d111b125e 100644 --- a/src/yes.c +++ b/src/yes.c @@ -27,6 +27,7 @@ #include "full-write.h" #include "isapipe.h" #include "long-options.h" +#include "splice.h" #include "unistd--.h" /* The official name of this program (e.g., no 'g' prefix). */ @@ -76,10 +77,6 @@ repeat_pattern (char *dest, char const *src, idx_t srcsize, idx_t bufsize) #if HAVE_SPLICE -/* Empirically determined pipe size for best throughput. - Needs to be <= /proc/sys/fs/pipe-max-size */ -enum { SPLICE_PIPE_SIZE = 512 * 1024 }; - /* Enlarge a pipe towards SPLICE_PIPE_SIZE and return the actual capacity as a quarter of the pipe size (the empirical sweet spot for vmsplice throughput), rounded down to a multiple of COPYSIZE. @@ -88,15 +85,7 @@ enum { SPLICE_PIPE_SIZE = 512 * 1024 }; static idx_t pipe_splice_size (int fd, idx_t copysize) { - int pipe_cap = 0; -# if defined F_SETPIPE_SZ && defined F_GETPIPE_SZ - if ((pipe_cap = fcntl (fd, F_SETPIPE_SZ, SPLICE_PIPE_SIZE)) < 0) - pipe_cap = fcntl (fd, F_GETPIPE_SZ); -# endif - if (pipe_cap <= 0) - pipe_cap = 64 * 1024; - - size_t buf_cap = pipe_cap / 4; + size_t buf_cap = increase_pipe_size (fd) / 4; return buf_cap / copysize * copysize; } diff --git a/tests/cat/splice.sh b/tests/cat/splice.sh new file mode 100755 index 000000000..d47481e16 --- /dev/null +++ b/tests/cat/splice.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# Test some cases where 'cat' uses the splice system call. + +# Copyright (C) 2026 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ cat +getlimits_ +uses_strace_ + +# Check the non pipe output case, since that is different with splice +if timeout 10 true; then + timeout .1 cat /dev/zero >/dev/null + test $? = 124 || fail=1 +fi + +# Test that splice errors are diagnosed. +cat <<EOF > exp || framework_failure_ +cat: /dev/zero: $EIO +EOF +for when in 3 4; do + returns_ 1 timeout 10 strace -o /dev/null \ + -e inject=splice:error=EIO:when=$when \ + cat /dev/zero >/dev/null 2>err || fail=1 + compare exp err || fail=1 +done + +# Ensure we fallback to write() if there is an issue with (async) zero-copy +zc_syscalls='io_uring_setup io_uring_enter io_uring_register memfd_create + sendfile splice tee vmsplice' +syscalls=$( + for s in $zc_syscalls; do + strace -qe "$s" true >/dev/null 2>&1 && echo "$s" + done | paste -s -d,) + +no_zero_copy() { + strace -f -o /dev/null -e inject=${syscalls}:error=ENOSYS "$@" +} +if no_zero_copy true; then + test "$(no_zero_copy cat /dev/zero | head -c 2 | tr '\0' 'y')" = 'yy' \ + || fail=1 +fi +# Ensure we fallback to write() if there is an issue with pipe2() +# For example if we don't have enough file descriptors available. +no_pipe() { strace -f -o /dev/null -e inject=pipe,pipe2:error=EMFILE "$@"; } +if no_pipe true; then + no_pipe timeout .1 cat /dev/zero >/dev/null + test $? = 124 || fail=1 +fi + +Exit $fail diff --git a/tests/local.mk b/tests/local.mk index 590978297..2e889e207 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -308,6 +308,7 @@ all_tests = \ tests/cat/cat-proc.sh \ tests/cat/cat-buf.sh \ tests/cat/cat-self.sh \ + tests/cat/splice.sh \ tests/misc/basename.pl \ tests/basenc/base64.pl \ tests/basenc/basenc.pl \ -- 2.53.0
