An oversight in my multi-byte fold changes was that I did not consider
characters with a width of zero. Here is an easy way to see the issue:

    $ ./src/fold /dev/zero 
    Segmentation fault (core dumped)

The first patch fixes that by printing the output buffer if the current
character cannot fit in it and the current column is less than the fold
width (i.e. the unlikely case of many zero width characters). It also
adds a test using /dev/zero and U+200B ZERO WIDTH SPACE.

The use of 'fold /dev/zero' is really a more specific case of fold
operating on very long lines. In all previous versions before the
multi-byte changes this can exhaust the systems memory only if --width
is large:

    $ timeout 60 valgrind ~/fold-9.7 --width=$((1 << 62)) /dev/zero
    ==2105553==     in use at exit: 2,122,618,116 bytes in 51 blocks

I added a note about that in NEWS since it is a nice change, despite it
being highly unlikely anyone does this.

Patch 3 corrects the wording to state this only happens if a large
--width is given.

Collin

>From 98e5418a38fa8e944d5b694ba2aa0c165092e63b Mon Sep 17 00:00:00 2001
Message-ID: <98e5418a38fa8e944d5b694ba2aa0c165092e63b.1757998224.git.collin.fu...@gmail.com>
From: Collin Funk <[email protected]>
Date: Mon, 15 Sep 2025 20:53:23 -0700
Subject: [PATCH 1/3] fold: fix out of bounds write with zero width characters

* src/fold.c (fold_file): Prefer putchar ('\n') to copying characters.
If we do not have room in the output buffer print it since it is not a
full line of text.
* tests/fold/fold-zero-width.sh: New test case.
* tests/local.mk (all_tests): Add it.
---
 src/fold.c                    | 15 +++++++---
 tests/fold/fold-zero-width.sh | 55 +++++++++++++++++++++++++++++++++++
 tests/local.mk                |  1 +
 3 files changed, 67 insertions(+), 4 deletions(-)
 create mode 100755 tests/fold/fold-zero-width.sh

diff --git a/src/fold.c b/src/fold.c
index b90bc7d80..190865963 100644
--- a/src/fold.c
+++ b/src/fold.c
@@ -192,9 +192,8 @@ fold_file (char const *filename, size_t width)
             }
           if (g.ch == '\n')
             {
-              memcpy (line_out + offset_out, p, g.len);
-              offset_out += g.len;
               fwrite (line_out, sizeof (char), offset_out, stdout);
+              putchar ('\n');
               column = offset_out = 0;
               continue;
             }
@@ -249,17 +248,25 @@ fold_file (char const *filename, size_t width)
 
               if (offset_out == 0)
                 {
-                  memcpy (line_out + offset_out, p, g.len);
+                  memcpy (line_out, p, g.len);
                   offset_out += g.len;
                   continue;
                 }
 
-              line_out[offset_out++] = '\n';
               fwrite (line_out, sizeof (char), offset_out, stdout);
+              putchar ('\n');
               column = offset_out = 0;
               goto rescan;
             }
 
+          /* This can occur if we have read characters with a width of
+             zero.  */
+          if (sizeof line_out <= offset_out + g.len)
+            {
+              fwrite (line_out, sizeof (char), offset_out, stdout);
+              offset_out = 0;
+            }
+
           memcpy (line_out + offset_out, p, g.len);
           offset_out += g.len;
         }
diff --git a/tests/fold/fold-zero-width.sh b/tests/fold/fold-zero-width.sh
new file mode 100755
index 000000000..a0d7e3fe6
--- /dev/null
+++ b/tests/fold/fold-zero-width.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+# Test fold with zero width characters.
+
+# Copyright (C) 2025 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ fold printf
+getlimits_
+
+# Make sure we do not overflow the buffer.
+IO_BUFSIZE_TIMES2=$(($IO_BUFSIZE * 2))
+
+# Fold counts by columns by default.
+head -c $IO_BUFSIZE_TIMES2 /dev/zero | fold > out || fail=1
+test $(cat out | wc -l) -eq 0 || fail=1
+
+# Check that zero width characters are counted with --characters.
+head -c $IO_BUFSIZE_TIMES2 /dev/zero | fold --characters > out || fail=1
+test $(cat out | wc -l) -eq $(($IO_BUFSIZE_TIMES2 / 80)) || fail=1
+
+test "$LOCALE_FR_UTF8" != none || skip_ "French UTF-8 locale not available"
+
+LC_ALL=$LOCALE_FR_UTF8
+export LC_ALL
+
+# Same thing, but using U+200B ZERO WIDTH SPACE.
+yes $(env printf '\u200B') | head -n $IO_BUFSIZE_TIMES2 | tr -d '\n' > inp
+
+fold inp > out || fail=1
+test $(cat out | wc -l) -eq 0 || fail=1
+
+fold --characters inp > out || fail=1
+test $(cat out | wc -l) -eq $(($IO_BUFSIZE_TIMES2 / 80)) || fail=1
+
+# Ensure bounded memory operation.
+vm=$(get_min_ulimit_v_ fold /dev/null) && {
+  head -c $IO_BUFSIZE_TIMES2 /dev/zero | tr -d '\n' \
+    | (ulimit -v $(($vm+8000)) && fold 2>err) | head || fail=1
+  compare /dev/null err || fail=1
+}
+
+Exit $fail
diff --git a/tests/local.mk b/tests/local.mk
index 67a919e84..4aa199a19 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -330,6 +330,7 @@ all_tests =					\
   tests/fold/fold-characters.sh			\
   tests/fold/fold-nbsp.sh			\
   tests/fold/fold-spaces.sh			\
+  tests/fold/fold-zero-width.sh			\
   tests/fold/fold.pl				\
   tests/groups/groups-dash.sh			\
   tests/groups/groups-process-all.sh		\
-- 
2.51.0

>From 58b5fd0bc8f0c398c1dbb4f90d2940dab30c9115 Mon Sep 17 00:00:00 2001
Message-ID: <58b5fd0bc8f0c398c1dbb4f90d2940dab30c9115.1757998224.git.collin.fu...@gmail.com>
In-Reply-To: <98e5418a38fa8e944d5b694ba2aa0c165092e63b.1757998224.git.collin.fu...@gmail.com>
References: <98e5418a38fa8e944d5b694ba2aa0c165092e63b.1757998224.git.collin.fu...@gmail.com>
From: Collin Funk <[email protected]>
Date: Mon, 15 Sep 2025 21:09:42 -0700
Subject: [PATCH 2/3] doc: NEWS: mention fold can operate on very long lines

* NEWS: Before commit fb9016d50 (fold: use fread instead of getline,
2025-08-24), fold required that the maximum line size in a file fit into
memory. Document that this is no longer the case.
---
 NEWS | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/NEWS b/NEWS
index 392129dad..ef423425b 100644
--- a/NEWS
+++ b/NEWS
@@ -38,6 +38,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   precedence.  Previously multiple specifications would induce an error.
   [bug introduced in coreutils-5.90]
 
+  'fold' no longer exhausts memory when operating on very long lines.
+  [This bug was present in "the beginning".]
+
   'install -d' now produces the correct diagnostic upon failure
   to create a directory.  Previously it would have produced
   a confusing error about changing permissions.
-- 
2.51.0

>From e33bfb5886f3e4c0e45b833289a210d673947e49 Mon Sep 17 00:00:00 2001
Message-ID: <e33bfb5886f3e4c0e45b833289a210d673947e49.1757998224.git.collin.fu...@gmail.com>
In-Reply-To: <98e5418a38fa8e944d5b694ba2aa0c165092e63b.1757998224.git.collin.fu...@gmail.com>
References: <98e5418a38fa8e944d5b694ba2aa0c165092e63b.1757998224.git.collin.fu...@gmail.com>
From: Collin Funk <[email protected]>
Date: Mon, 15 Sep 2025 21:40:56 -0700
Subject: [PATCH 3/3] doc: NEWS: correct the previous commit

* NEWS: The memory allocated by 'fold' in the past was also bounded by
--width.
---
 NEWS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index ef423425b..84692156c 100644
--- a/NEWS
+++ b/NEWS
@@ -38,7 +38,8 @@ GNU coreutils NEWS                                    -*- outline -*-
   precedence.  Previously multiple specifications would induce an error.
   [bug introduced in coreutils-5.90]
 
-  'fold' no longer exhausts memory when operating on very long lines.
+  'fold' no longer exhausts memory when operating on very long lines and
+  a very large --width argument.
   [This bug was present in "the beginning".]
 
   'install -d' now produces the correct diagnostic upon failure
-- 
2.51.0

Reply via email to