These two patches work around an ISO C compliance bug of wcscmp()
on several platforms, and another one specific to AIX.

The point is that in ISO C, a "wide character" is any wchar_t value.
A "wide character" is not constrained to the range 0..INT_MAX. For
the precise reasoning, see
<https://www.openwall.com/lists/musl/2023/04/18/5>.

In particular, the module 'wcscmp' is no longer obsolete.


2023-04-18  Bruno Haible  <[email protected]>

        wcscmp: Add tests.
        * tests/test-wcscmp.c: New file, based on tests/unistr/test-strcmp.h.
        * modules/wcscmp-tests: New file.

        wcscmp: Work around two ISO C compliance bugs on several platforms.
        * lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP.
        * lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters
        are in the range 0..INT_MAX.
        * m4/wcscmp.m4 (gl_FUNC_WCSCMP): Test whether wcscmp works for all wide
        characters. Set REPLACE_WCSCMP.
        * m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WCSCMP.
        * modules/wchar (Makefile.am): Substitute REPLACE_WCSCMP.
        * modules/wcscmp (Status, Notice): Un-obsolete this module.
        (configure.ac): Consider REPLACE_WCSCMP.
        * doc/posix-functions/wcscmp.texi: Mention the two bugs.

>From 4b440d3568b01dd9acd5242bea8b63fc43428f5a Mon Sep 17 00:00:00 2001
From: Bruno Haible <[email protected]>
Date: Wed, 19 Apr 2023 02:14:09 +0200
Subject: [PATCH 1/2] wcscmp: Work around two ISO C compliance bugs on several
 platforms.

* lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP.
* lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters
are in the range 0..INT_MAX.
* m4/wcscmp.m4 (gl_FUNC_WCSCMP): Test whether wcscmp works for all wide
characters. Set REPLACE_WCSCMP.
* m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WCSCMP.
* modules/wchar (Makefile.am): Substitute REPLACE_WCSCMP.
* modules/wcscmp (Status, Notice): Un-obsolete this module.
(configure.ac): Consider REPLACE_WCSCMP.
* doc/posix-functions/wcscmp.texi: Mention the two bugs.
---
 ChangeLog                       | 14 ++++++++
 doc/posix-functions/wcscmp.texi |  8 +++++
 lib/wchar.in.h                  | 14 ++++++--
 lib/wcscmp-impl.h               |  5 +--
 m4/wchar_h.m4                   |  3 +-
 m4/wcscmp.m4                    | 58 ++++++++++++++++++++++++++++++++-
 modules/wchar                   |  1 +
 modules/wcscmp                  |  9 ++---
 8 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 17596c4b23..bc02f2d5f5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2023-04-18  Bruno Haible  <[email protected]>
+
+	wcscmp: Work around two ISO C compliance bugs on several platforms.
+	* lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP.
+	* lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters
+	are in the range 0..INT_MAX.
+	* m4/wcscmp.m4 (gl_FUNC_WCSCMP): Test whether wcscmp works for all wide
+	characters. Set REPLACE_WCSCMP.
+	* m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WCSCMP.
+	* modules/wchar (Makefile.am): Substitute REPLACE_WCSCMP.
+	* modules/wcscmp (Status, Notice): Un-obsolete this module.
+	(configure.ac): Consider REPLACE_WCSCMP.
+	* doc/posix-functions/wcscmp.texi: Mention the two bugs.
+
 2023-04-18  Bruno Haible  <[email protected]>
 
 	wmemcmp: Add tests.
diff --git a/doc/posix-functions/wcscmp.texi b/doc/posix-functions/wcscmp.texi
index 4c4de8c6dc..bc64d28f56 100644
--- a/doc/posix-functions/wcscmp.texi
+++ b/doc/posix-functions/wcscmp.texi
@@ -8,6 +8,14 @@
 
 Portability problems fixed by Gnulib:
 @itemize
+@item
+This function compares the wide characters as if they were unsigned, although
+@code{wchar_t} is signed, on some platforms:
+glibc 2.14.1 on x86 or x86_64, musl libc 1.2.3, macOS 12.5, FreeBSD 13.2, NetBSD 9.0, OpenBSD 7.2, Solaris 11.4.
+@item
+This function may return a wrong result if the two arguments are of different
+length, on some platforms:
+AIX 7.2 in 64-bit mode.
 @end itemize
 
 Portability problems not fixed by Gnulib:
diff --git a/lib/wchar.in.h b/lib/wchar.in.h
index 6a5b18d39d..c347256368 100644
--- a/lib/wchar.in.h
+++ b/lib/wchar.in.h
@@ -938,11 +938,21 @@ _GL_WARN_ON_USE (wcsncat, "wcsncat is unportable - "
 
 /* Compare S1 and S2.  */
 #if @GNULIB_WCSCMP@
-# if !@HAVE_WCSCMP@
+# if @REPLACE_WCSCMP@
+#  if !(defined __cplusplus && defined GNULIB_NAMESPACE)
+#   undef wcscmp
+#   define wcscmp rpl_wcscmp
+#  endif
+_GL_FUNCDECL_RPL (wcscmp, int, (const wchar_t *s1, const wchar_t *s2)
+                               _GL_ATTRIBUTE_PURE);
+_GL_CXXALIAS_RPL (wcscmp, int, (const wchar_t *s1, const wchar_t *s2));
+# else
+#  if !@HAVE_WCSCMP@
 _GL_FUNCDECL_SYS (wcscmp, int, (const wchar_t *s1, const wchar_t *s2)
                                _GL_ATTRIBUTE_PURE);
-# endif
+#  endif
 _GL_CXXALIAS_SYS (wcscmp, int, (const wchar_t *s1, const wchar_t *s2));
+# endif
 # if __GLIBC__ >= 2
 _GL_CXXALIASWARN (wcscmp);
 # endif
diff --git a/lib/wcscmp-impl.h b/lib/wcscmp-impl.h
index ba96db4f77..bc17c75d94 100644
--- a/lib/wcscmp-impl.h
+++ b/lib/wcscmp-impl.h
@@ -24,8 +24,9 @@ wcscmp (const wchar_t *s1, const wchar_t *s2)
       wchar_t wc2 = *s2++;
       if (wc1 != (wchar_t)'\0' && wc1 == wc2)
         continue;
-      /* Note that wc1 and wc2 each have at most 31 bits.  */
-      return (int)wc1 - (int)wc2;
+      /* ISO C requires wcscmp to work with all wchar_t values.
+         We cannot assume that wc1 and wc2 are in the range 0..INT_MAX.  */
+      return _GL_CMP (wc1, wc2);
              /* > 0 if wc1 > wc2, < 0 if wc1 < wc2,
                 = 0 if wc1 and wc2 are both '\0'.  */
     }
diff --git a/m4/wchar_h.m4 b/m4/wchar_h.m4
index dfd154f8e9..b9fa7cec84 100644
--- a/m4/wchar_h.m4
+++ b/m4/wchar_h.m4
@@ -7,7 +7,7 @@
 
 dnl Written by Eric Blake.
 
-# wchar_h.m4 serial 58
+# wchar_h.m4 serial 59
 
 AC_DEFUN_ONCE([gl_WCHAR_H],
 [
@@ -253,6 +253,7 @@ AC_DEFUN([gl_WCHAR_H_DEFAULTS]
   REPLACE_WCWIDTH=0;    AC_SUBST([REPLACE_WCWIDTH])
   REPLACE_WCSWIDTH=0;   AC_SUBST([REPLACE_WCSWIDTH])
   REPLACE_WCSFTIME=0;   AC_SUBST([REPLACE_WCSFTIME])
+  REPLACE_WCSCMP=0;     AC_SUBST([REPLACE_WCSCMP])
   REPLACE_WCSSTR=0;     AC_SUBST([REPLACE_WCSSTR])
   REPLACE_WCSTOK=0;     AC_SUBST([REPLACE_WCSTOK])
   REPLACE_WMEMCMP=0;    AC_SUBST([REPLACE_WMEMCMP])
diff --git a/m4/wcscmp.m4 b/m4/wcscmp.m4
index 61fd800e4d..a3000ed275 100644
--- a/m4/wcscmp.m4
+++ b/m4/wcscmp.m4
@@ -1,4 +1,4 @@
-# wcscmp.m4 serial 2
+# wcscmp.m4 serial 3
 dnl Copyright (C) 2011-2023 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -10,5 +10,61 @@ AC_DEFUN([gl_FUNC_WCSCMP]
   AC_CHECK_FUNCS_ONCE([wcscmp])
   if test $ac_cv_func_wcscmp = no; then
     HAVE_WCSCMP=0
+  else
+    AC_CACHE_CHECK([whether wcscmp works for all wide characters],
+      [gl_cv_func_wcscmp_works],
+      [AC_RUN_IFELSE(
+         [AC_LANG_SOURCE([[
+            #include <wchar.h>
+            int main ()
+            {
+              int result = 0;
+              { /* This test fails on glibc < 2.15, musl libc 1.2.3, macOS 12.5,
+                   FreeBSD 13.2, NetBSD 9.0, OpenBSD 7.2, Solaris 11.4.  */
+                wchar_t a[2] = { (wchar_t) 0x76547654, 0 };
+                wchar_t b[2] = { (wchar_t) 0x9abc9abc, 0 };
+                int cmp = wcscmp (a, b);
+                if (!((wchar_t)-1 < 0 ? cmp > 0 : cmp < 0))
+                  result |= 1;
+              }
+              { /* This test fails on AIX in 64-bit mode.  */
+                wchar_t c[2] = { (wchar_t) 'x', 0 };
+                wchar_t d[3] = { (wchar_t) 'x', (wchar_t) 0x9abc9abc, 0 };
+                int cmp = wcscmp (c, d);
+                if (!((wchar_t)-1 < 0 ? cmp > 0 : cmp < 0))
+                  result |= 2;
+              }
+              return result;
+            }
+            ]])
+         ],
+         [gl_cv_func_wcscmp_works=yes],
+         [gl_cv_func_wcscmp_works=no],
+         [case "$host_on" in
+            # Guess no on glibc versions < 2.15.
+            *-gnu* | gnu*)
+              AC_EGREP_CPP([Unlucky],
+                [
+#include <features.h>
+#ifdef __GNU_LIBRARY__
+ #if (__GLIBC__ == 2 && __GLIBC_MINOR__ < 15)
+  Unlucky GNU user
+ #endif
+#endif
+                ],
+                [gl_cv_func_wcscmp_works="guessing no"],
+                [gl_cv_func_wcscmp_works="guessing yes"])
+              ;;
+            # Guess no on musl systems.
+            *-musl* | midipix*) gl_cv_func_wcscmp_works="guessing no" ;;
+            # If we don't know, obey --enable-cross-guesses.
+            *) gl_cv_func_wcscmp_works="$gl_cross_guess_normal" ;;
+          esac
+         ])
+      ])
+    case "$gl_cv_func_wcscmp_works" in
+      *yes) ;;
+      *) REPLACE_WCSCMP=1 ;;
+    esac
   fi
 ])
diff --git a/modules/wchar b/modules/wchar
index 180c94309c..88b442b525 100644
--- a/modules/wchar
+++ b/modules/wchar
@@ -142,6 +142,7 @@ wchar.h: wchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H)
 	      -e 's|@''REPLACE_WCWIDTH''@|$(REPLACE_WCWIDTH)|g' \
 	      -e 's|@''REPLACE_WCSWIDTH''@|$(REPLACE_WCSWIDTH)|g' \
 	      -e 's|@''REPLACE_WCSFTIME''@|$(REPLACE_WCSFTIME)|g' \
+	      -e 's|@''REPLACE_WCSCMP''@|$(REPLACE_WCSCMP)|g' \
 	      -e 's|@''REPLACE_WCSSTR''@|$(REPLACE_WCSSTR)|g' \
 	      -e 's|@''REPLACE_WCSTOK''@|$(REPLACE_WCSTOK)|g' \
 	      -e 's|@''REPLACE_WMEMCMP''@|$(REPLACE_WMEMCMP)|g' \
diff --git a/modules/wcscmp b/modules/wcscmp
index 1b4fde704a..6df70f0543 100644
--- a/modules/wcscmp
+++ b/modules/wcscmp
@@ -1,12 +1,6 @@
 Description:
 wcscmp() function: compare two wide strings.
 
-Status:
-obsolete
-
-Notice:
-This module is obsolete.
-
 Files:
 lib/wcscmp.c
 lib/wcscmp-impl.h
@@ -17,7 +11,8 @@ wchar
 
 configure.ac:
 gl_FUNC_WCSCMP
-gl_CONDITIONAL([GL_COND_OBJ_WCSCMP], [test $HAVE_WCSCMP = 0])
+gl_CONDITIONAL([GL_COND_OBJ_WCSCMP],
+               [test $HAVE_WCSCMP = 0 || test $REPLACE_WCSCMP = 1])
 gl_WCHAR_MODULE_INDICATOR([wcscmp])
 
 Makefile.am:
-- 
2.34.1

From 9bf6bcc74b4caf4d74bab8d98a4e00f761f2e5ca Mon Sep 17 00:00:00 2001
From: Bruno Haible <[email protected]>
Date: Wed, 19 Apr 2023 02:14:44 +0200
Subject: [PATCH 2/2] wcscmp: Add tests.

* tests/test-wcscmp.c: New file, based on tests/unistr/test-strcmp.h.
* modules/wcscmp-tests: New file.
---
 ChangeLog            |   4 ++
 modules/wcscmp-tests |  12 +++++
 tests/test-wcscmp.c  | 122 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 138 insertions(+)
 create mode 100644 modules/wcscmp-tests
 create mode 100644 tests/test-wcscmp.c

diff --git a/ChangeLog b/ChangeLog
index bc02f2d5f5..a88dfc1d81 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2023-04-18  Bruno Haible  <[email protected]>
 
+	wcscmp: Add tests.
+	* tests/test-wcscmp.c: New file, based on tests/unistr/test-strcmp.h.
+	* modules/wcscmp-tests: New file.
+
 	wcscmp: Work around two ISO C compliance bugs on several platforms.
 	* lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP.
 	* lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters
diff --git a/modules/wcscmp-tests b/modules/wcscmp-tests
new file mode 100644
index 0000000000..839d3175c4
--- /dev/null
+++ b/modules/wcscmp-tests
@@ -0,0 +1,12 @@
+Files:
+tests/test-wcscmp.c
+tests/signature.h
+tests/macros.h
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-wcscmp
+check_PROGRAMS += test-wcscmp
diff --git a/tests/test-wcscmp.c b/tests/test-wcscmp.c
new file mode 100644
index 0000000000..e117d3e319
--- /dev/null
+++ b/tests/test-wcscmp.c
@@ -0,0 +1,122 @@
+/* Test of wcscmp() function.
+   Copyright (C) 2010-2023 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <[email protected]>, 2023.  */
+
+#include <config.h>
+
+#include <wchar.h>
+
+#include "signature.h"
+SIGNATURE_CHECK (wcscmp, int, (const wchar_t *, const wchar_t *));
+
+#include "macros.h"
+
+int
+main (int argc, char *argv[])
+{
+  /* Test simple cases.  */
+  {
+    static const wchar_t input1[] = { 0 };
+    static const wchar_t input2[] = { 0 };
+    ASSERT (wcscmp (input1, input2) == 0);
+  }
+  {
+    static const wchar_t input1[] = { 0 };
+    static const wchar_t input2[] = { 'f', 'o', 'o', 0 };
+    ASSERT (wcscmp (input1, input2) < 0);
+    ASSERT (wcscmp (input2, input1) > 0);
+  }
+  {
+    static const wchar_t input1[] = { 'f', 'o', 'o', 0 };
+    static const wchar_t input2[] = { 'f', 'o', 'o', 0 };
+    ASSERT (wcscmp (input1, input2) == 0);
+  }
+  {
+    static const wchar_t input1[] = { 'f', 'o', 'o', 0 };
+    static const wchar_t input2[] = { 'b', 'a', 'r', 0 };
+    ASSERT (wcscmp (input1, input2) > 0);
+    ASSERT (wcscmp (input2, input1) < 0);
+  }
+  {
+    static const wchar_t input1[] = { 'f', 'o', 'o', 0 };
+    static const wchar_t input2[] = { 'f', 'o', 'o', 'b', 'a', 'r', 0 };
+    ASSERT (wcscmp (input1, input2) < 0);
+    ASSERT (wcscmp (input2, input1) > 0);
+  }
+  {
+    static const wchar_t input1[] = { 'o', 'o', 'm', 'p', 'h', 0 };
+    static const wchar_t input2[] = { 'o', 'o', 'p', 's', 0 };
+    ASSERT (wcscmp (input1, input2) < 0);
+    ASSERT (wcscmp (input2, input1) > 0);
+  }
+
+  /* ISO C requires wcscmp to work with all wchar_t values.
+     ISO C 17 § 7.29.4.4 says:
+       "Unless explicitly stated otherwise, the functions described in this
+        subclause order two wide characters the same way as two integers of
+        the underlying integer type designated by wchar_t."  */
+  {
+    static const wchar_t input1[] = { (wchar_t) 0x76547654, 0 };
+    static const wchar_t input2[] = { (wchar_t) 0x9abc9abc, 0 };
+    if ((wchar_t)-1 < 0)
+      {
+        /* wchar_t is signed.  */
+        ASSERT (wcscmp (input1, input2) > 0);
+        ASSERT (wcscmp (input2, input1) < 0);
+      }
+    else
+      {
+        /* wchar_t is unsigned.  */
+        ASSERT (wcscmp (input1, input2) < 0);
+        ASSERT (wcscmp (input2, input1) > 0);
+      }
+  }
+  {
+    static const wchar_t input1[] = { (wchar_t) 0x9abc9abc, 0 };
+    static const wchar_t input2[] = { (wchar_t) 0x9bdf9bdf, 0 };
+    ASSERT (wcscmp (input1, input2) < 0);
+    ASSERT (wcscmp (input2, input1) > 0);
+  }
+
+  /* Comparing a negative wchar_t value against a null wchar_t.
+     ISO C 17 § 7.29.4.4.1 says:
+       "The wcscmp function compares the wide string pointed to by s1 to
+        the wide string pointed to by s2."
+     ISO C 17 § 7.1.1 defines the term "wide string":
+       "A wide string is a contiguous sequence of wide characters terminated
+        by and including the first null wide character."
+     This means that the comparison extends up to and *including* the first
+     null wchar_t.  */
+  {
+    static const wchar_t input1[] = { (wchar_t) 'x', 0 };
+    static const wchar_t input2[] = { (wchar_t) 'x', (wchar_t) 0x9abc9abc, 0 };
+    if ((wchar_t)-1 < 0)
+      {
+        /* wchar_t is signed.  */
+        ASSERT (wcscmp (input1, input2) > 0);
+        ASSERT (wcscmp (input2, input1) < 0);
+      }
+    else
+      {
+        /* wchar_t is unsigned.  */
+        ASSERT (wcscmp (input1, input2) < 0);
+        ASSERT (wcscmp (input2, input1) > 0);
+      }
+  }
+
+  return 0;
+}
-- 
2.34.1

Reply via email to