On 2024-04-06 03:09, Pádraig Brady wrote:
I'll apply this.

Heh, I beat you to it by looking for similar errors elsewhere and applying the attached patches to fix the issues I found. None of them look like serious bugs.

BTW we should improve sort buffer handling in general

Oh yes.

PS. My current little task is to get i18n to work better with 'sort'. Among other things I want Unicode-style full case folding.
From 225cb8d7473eadb481a4884e929bf23589d4bd82 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 6 Apr 2024 15:13:23 -0700
Subject: [PATCH 1/4] =?UTF-8?q?cat:=20don=E2=80=99t=20trust=20st=5Fsize=20?=
 =?UTF-8?q?on=20/proc=20files?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/cat.c (main):
Improve test for when copying will exhaust the output device.
Do not rely on st_size, which is unreliable in /proc.
Use lseek instead; this is good enough here.
* tests/cat/cat-self.sh: Test the relaxation of the heuristic
for self-copying.
---
 src/cat.c             | 31 +++++++++++++++++++++----------
 tests/cat/cat-self.sh | 20 ++++++++++++++++++++
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/src/cat.c b/src/cat.c
index 4ed404363..b33faeb35 100644
--- a/src/cat.c
+++ b/src/cat.c
@@ -645,9 +645,10 @@ main (int argc, char **argv)
   /* Optimal size of i/o operations of output.  */
   idx_t outsize = io_blksize (&stat_buf);
 
-  /* Device and I-node number of the output.  */
+  /* Device, I-node number and lazily-acquired flags of the output.  */
   dev_t out_dev = stat_buf.st_dev;
   ino_t out_ino = stat_buf.st_ino;
+  int out_flags = -2;
 
   /* True if the output is a regular file.  */
   bool out_isreg = S_ISREG (stat_buf.st_mode) != 0;
@@ -701,17 +702,27 @@ main (int argc, char **argv)
 
       fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL);
 
-      /* Don't copy a nonempty regular file to itself, as that would
-         merely exhaust the output device.  It's better to catch this
-         error earlier rather than later.  */
+      /* Don't copy a file to itself if that would merely exhaust the
+         output device.  It's better to catch this error earlier
+         rather than later.  */
 
-      if (out_isreg
-          && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino
-          && lseek (input_desc, 0, SEEK_CUR) < stat_buf.st_size)
+      if (stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino)
         {
-          error (0, 0, _("%s: input file is output file"), quotef (infile));
-          ok = false;
-          goto contin;
+          if (out_flags < -1)
+            out_flags = fcntl (STDOUT_FILENO, F_GETFL);
+          bool exhausting = 0 <= out_flags && out_flags & O_APPEND;
+          if (!exhausting)
+            {
+              off_t in_pos = lseek (input_desc, 0, SEEK_CUR);
+              if (0 <= in_pos)
+                exhausting = in_pos < lseek (STDOUT_FILENO, 0, SEEK_CUR);
+            }
+          if (exhausting)
+            {
+              error (0, 0, _("%s: input file is output file"), quotef (infile));
+              ok = false;
+              goto contin;
+            }
         }
 
       /* Pointer to the input buffer.  */
diff --git a/tests/cat/cat-self.sh b/tests/cat/cat-self.sh
index e0f6455c0..854825def 100755
--- a/tests/cat/cat-self.sh
+++ b/tests/cat/cat-self.sh
@@ -30,4 +30,24 @@ echo y >doc.end || framework_failure_
 cat doc doc.end >doc || fail=1
 compare doc doc.end || fail=1
 
+# This terminates even though it copies a file to itself.
+# Coreutils 9.5 and earlier rejected this.
+echo x >fx || framework_failure_
+echo y >fy || framework_failure_
+cat fx fy >fxy || fail=1
+for i in 1 2; do
+  cat fx >fxy$i || fail=1
+done
+for i in 3 4 5 6; do
+  cat fx >fx$i || fail=1
+done
+cat - fy <fxy1 1<>fxy1 || fail=1
+compare fxy fxy1 || fail=1
+cat fxy2 fy 1<>fxy2 || fail=1
+compare fxy fxy2 || fail=1
+returns_ 1 cat fx fx3 1<>fx3 || fail=1
+returns_ 1 cat - fx4 <fx 1<>fx4 || fail=1
+returns_ 1 cat fx5 >>fx5 || fail=1
+returns_ 1 cat <fx6 >>fx6 || fail=1
+
 Exit $fail
-- 
2.40.1

From ac6b8d8224de140f5a6f2ca66e6ce279604a37e6 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 6 Apr 2024 15:15:04 -0700
Subject: [PATCH 2/4] =?UTF-8?q?dd:=20don=E2=80=99t=20trust=20st=5Fsize=20o?=
 =?UTF-8?q?n=20/proc/files?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/dd.c (skip): Don’t trust st_size == 0.
---
 src/dd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dd.c b/src/dd.c
index 370a42c97..6e0d725da 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -1809,7 +1809,7 @@ skip (int fdesc, char const *file, intmax_t records, idx_t blocksize,
            struct stat st;
            if (ifstat (STDIN_FILENO, &st) != 0)
              error (EXIT_FAILURE, errno, _("cannot fstat %s"), quoteaf (file));
-           if (usable_st_size (&st) && 0 <= input_offset
+           if (usable_st_size (&st) && 0 < st.st_size && 0 <= input_offset
                && st.st_size - input_offset < offset)
              {
                /* When skipping past EOF, return the number of _full_ blocks
-- 
2.40.1

From 8ff3903281e03d36dd1aa2a202a56f38af726d91 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 6 Apr 2024 15:17:14 -0700
Subject: [PATCH 3/4] =?UTF-8?q?sort:=20don=E2=80=99t=20trust=20st=5Fsize?=
 =?UTF-8?q?=20on=20/proc=20files?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problem and fix reported by Takashi Kusumi in:
https://bugs.gnu.org/70231
* src/sort.c (sort_buffer_size): Don’t trust st_size == 0.
---
 src/sort.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sort.c b/src/sort.c
index 2d8324ca4..78983ff27 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -1539,7 +1539,7 @@ sort_buffer_size (FILE *const *fps, size_t nfps,
           != 0)
         sort_die (_("stat failed"), files[i]);
 
-      if (S_ISREG (st.st_mode))
+      if (usable_st_size (&st) && 0 < st.st_size)
         file_size = st.st_size;
       else
         {
-- 
2.40.1

From 11163675818ab877f20d3740a7c3e59d565b8e9c Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 6 Apr 2024 15:18:04 -0700
Subject: [PATCH 4/4] =?UTF-8?q?split:=20don=E2=80=99t=20trust=20st=5Fsize?=
 =?UTF-8?q?=20on=20/proc=20files?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/split.c (create): Don’t trust st_size == 0.
---
 src/split.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/split.c b/src/split.c
index 037960a59..f82a7f74b 100644
--- a/src/split.c
+++ b/src/split.c
@@ -489,10 +489,8 @@ create (char const *name)
       if (psame_inode (&in_stat_buf, &out_stat_buf))
         error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
                quoteaf (name));
-      bool regularish
-        = S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf);
-      if (! (regularish && out_stat_buf.st_size == 0)
-          && ftruncate (fd, 0) < 0 && regularish)
+      if (ftruncate (fd, 0) < 0
+          && (S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf)))
         error (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name));
 
       return fd;
-- 
2.40.1

Reply via email to