These two patches work around an ISO C compliance bug of wcscmp() on several platforms, and another one specific to AIX.
The point is that in ISO C, a "wide character" is any wchar_t value. A "wide character" is not constrained to the range 0..INT_MAX. For the precise reasoning, see <https://www.openwall.com/lists/musl/2023/04/18/5>. In particular, the module 'wcscmp' is no longer obsolete. 2023-04-18 Bruno Haible <[email protected]> wcscmp: Add tests. * tests/test-wcscmp.c: New file, based on tests/unistr/test-strcmp.h. * modules/wcscmp-tests: New file. wcscmp: Work around two ISO C compliance bugs on several platforms. * lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP. * lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters are in the range 0..INT_MAX. * m4/wcscmp.m4 (gl_FUNC_WCSCMP): Test whether wcscmp works for all wide characters. Set REPLACE_WCSCMP. * m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WCSCMP. * modules/wchar (Makefile.am): Substitute REPLACE_WCSCMP. * modules/wcscmp (Status, Notice): Un-obsolete this module. (configure.ac): Consider REPLACE_WCSCMP. * doc/posix-functions/wcscmp.texi: Mention the two bugs.
>From 4b440d3568b01dd9acd5242bea8b63fc43428f5a Mon Sep 17 00:00:00 2001 From: Bruno Haible <[email protected]> Date: Wed, 19 Apr 2023 02:14:09 +0200 Subject: [PATCH 1/2] wcscmp: Work around two ISO C compliance bugs on several platforms. * lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP. * lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters are in the range 0..INT_MAX. * m4/wcscmp.m4 (gl_FUNC_WCSCMP): Test whether wcscmp works for all wide characters. Set REPLACE_WCSCMP. * m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WCSCMP. * modules/wchar (Makefile.am): Substitute REPLACE_WCSCMP. * modules/wcscmp (Status, Notice): Un-obsolete this module. (configure.ac): Consider REPLACE_WCSCMP. * doc/posix-functions/wcscmp.texi: Mention the two bugs. --- ChangeLog | 14 ++++++++ doc/posix-functions/wcscmp.texi | 8 +++++ lib/wchar.in.h | 14 ++++++-- lib/wcscmp-impl.h | 5 +-- m4/wchar_h.m4 | 3 +- m4/wcscmp.m4 | 58 ++++++++++++++++++++++++++++++++- modules/wchar | 1 + modules/wcscmp | 9 ++--- 8 files changed, 99 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 17596c4b23..bc02f2d5f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2023-04-18 Bruno Haible <[email protected]> + + wcscmp: Work around two ISO C compliance bugs on several platforms. + * lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP. + * lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters + are in the range 0..INT_MAX. + * m4/wcscmp.m4 (gl_FUNC_WCSCMP): Test whether wcscmp works for all wide + characters. Set REPLACE_WCSCMP. + * m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WCSCMP. + * modules/wchar (Makefile.am): Substitute REPLACE_WCSCMP. + * modules/wcscmp (Status, Notice): Un-obsolete this module. + (configure.ac): Consider REPLACE_WCSCMP. + * doc/posix-functions/wcscmp.texi: Mention the two bugs. + 2023-04-18 Bruno Haible <[email protected]> wmemcmp: Add tests. diff --git a/doc/posix-functions/wcscmp.texi b/doc/posix-functions/wcscmp.texi index 4c4de8c6dc..bc64d28f56 100644 --- a/doc/posix-functions/wcscmp.texi +++ b/doc/posix-functions/wcscmp.texi @@ -8,6 +8,14 @@ Portability problems fixed by Gnulib: @itemize +@item +This function compares the wide characters as if they were unsigned, although +@code{wchar_t} is signed, on some platforms: +glibc 2.14.1 on x86 or x86_64, musl libc 1.2.3, macOS 12.5, FreeBSD 13.2, NetBSD 9.0, OpenBSD 7.2, Solaris 11.4. +@item +This function may return a wrong result if the two arguments are of different +length, on some platforms: +AIX 7.2 in 64-bit mode. @end itemize Portability problems not fixed by Gnulib: diff --git a/lib/wchar.in.h b/lib/wchar.in.h index 6a5b18d39d..c347256368 100644 --- a/lib/wchar.in.h +++ b/lib/wchar.in.h @@ -938,11 +938,21 @@ _GL_WARN_ON_USE (wcsncat, "wcsncat is unportable - " /* Compare S1 and S2. */ #if @GNULIB_WCSCMP@ -# if !@HAVE_WCSCMP@ +# if @REPLACE_WCSCMP@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# undef wcscmp +# define wcscmp rpl_wcscmp +# endif +_GL_FUNCDECL_RPL (wcscmp, int, (const wchar_t *s1, const wchar_t *s2) + _GL_ATTRIBUTE_PURE); +_GL_CXXALIAS_RPL (wcscmp, int, (const wchar_t *s1, const wchar_t *s2)); +# else +# if !@HAVE_WCSCMP@ _GL_FUNCDECL_SYS (wcscmp, int, (const wchar_t *s1, const wchar_t *s2) _GL_ATTRIBUTE_PURE); -# endif +# endif _GL_CXXALIAS_SYS (wcscmp, int, (const wchar_t *s1, const wchar_t *s2)); +# endif # if __GLIBC__ >= 2 _GL_CXXALIASWARN (wcscmp); # endif diff --git a/lib/wcscmp-impl.h b/lib/wcscmp-impl.h index ba96db4f77..bc17c75d94 100644 --- a/lib/wcscmp-impl.h +++ b/lib/wcscmp-impl.h @@ -24,8 +24,9 @@ wcscmp (const wchar_t *s1, const wchar_t *s2) wchar_t wc2 = *s2++; if (wc1 != (wchar_t)'\0' && wc1 == wc2) continue; - /* Note that wc1 and wc2 each have at most 31 bits. */ - return (int)wc1 - (int)wc2; + /* ISO C requires wcscmp to work with all wchar_t values. + We cannot assume that wc1 and wc2 are in the range 0..INT_MAX. */ + return _GL_CMP (wc1, wc2); /* > 0 if wc1 > wc2, < 0 if wc1 < wc2, = 0 if wc1 and wc2 are both '\0'. */ } diff --git a/m4/wchar_h.m4 b/m4/wchar_h.m4 index dfd154f8e9..b9fa7cec84 100644 --- a/m4/wchar_h.m4 +++ b/m4/wchar_h.m4 @@ -7,7 +7,7 @@ dnl Written by Eric Blake. -# wchar_h.m4 serial 58 +# wchar_h.m4 serial 59 AC_DEFUN_ONCE([gl_WCHAR_H], [ @@ -253,6 +253,7 @@ AC_DEFUN([gl_WCHAR_H_DEFAULTS] REPLACE_WCWIDTH=0; AC_SUBST([REPLACE_WCWIDTH]) REPLACE_WCSWIDTH=0; AC_SUBST([REPLACE_WCSWIDTH]) REPLACE_WCSFTIME=0; AC_SUBST([REPLACE_WCSFTIME]) + REPLACE_WCSCMP=0; AC_SUBST([REPLACE_WCSCMP]) REPLACE_WCSSTR=0; AC_SUBST([REPLACE_WCSSTR]) REPLACE_WCSTOK=0; AC_SUBST([REPLACE_WCSTOK]) REPLACE_WMEMCMP=0; AC_SUBST([REPLACE_WMEMCMP]) diff --git a/m4/wcscmp.m4 b/m4/wcscmp.m4 index 61fd800e4d..a3000ed275 100644 --- a/m4/wcscmp.m4 +++ b/m4/wcscmp.m4 @@ -1,4 +1,4 @@ -# wcscmp.m4 serial 2 +# wcscmp.m4 serial 3 dnl Copyright (C) 2011-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -10,5 +10,61 @@ AC_DEFUN([gl_FUNC_WCSCMP] AC_CHECK_FUNCS_ONCE([wcscmp]) if test $ac_cv_func_wcscmp = no; then HAVE_WCSCMP=0 + else + AC_CACHE_CHECK([whether wcscmp works for all wide characters], + [gl_cv_func_wcscmp_works], + [AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ + #include <wchar.h> + int main () + { + int result = 0; + { /* This test fails on glibc < 2.15, musl libc 1.2.3, macOS 12.5, + FreeBSD 13.2, NetBSD 9.0, OpenBSD 7.2, Solaris 11.4. */ + wchar_t a[2] = { (wchar_t) 0x76547654, 0 }; + wchar_t b[2] = { (wchar_t) 0x9abc9abc, 0 }; + int cmp = wcscmp (a, b); + if (!((wchar_t)-1 < 0 ? cmp > 0 : cmp < 0)) + result |= 1; + } + { /* This test fails on AIX in 64-bit mode. */ + wchar_t c[2] = { (wchar_t) 'x', 0 }; + wchar_t d[3] = { (wchar_t) 'x', (wchar_t) 0x9abc9abc, 0 }; + int cmp = wcscmp (c, d); + if (!((wchar_t)-1 < 0 ? cmp > 0 : cmp < 0)) + result |= 2; + } + return result; + } + ]]) + ], + [gl_cv_func_wcscmp_works=yes], + [gl_cv_func_wcscmp_works=no], + [case "$host_on" in + # Guess no on glibc versions < 2.15. + *-gnu* | gnu*) + AC_EGREP_CPP([Unlucky], + [ +#include <features.h> +#ifdef __GNU_LIBRARY__ + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ < 15) + Unlucky GNU user + #endif +#endif + ], + [gl_cv_func_wcscmp_works="guessing no"], + [gl_cv_func_wcscmp_works="guessing yes"]) + ;; + # Guess no on musl systems. + *-musl* | midipix*) gl_cv_func_wcscmp_works="guessing no" ;; + # If we don't know, obey --enable-cross-guesses. + *) gl_cv_func_wcscmp_works="$gl_cross_guess_normal" ;; + esac + ]) + ]) + case "$gl_cv_func_wcscmp_works" in + *yes) ;; + *) REPLACE_WCSCMP=1 ;; + esac fi ]) diff --git a/modules/wchar b/modules/wchar index 180c94309c..88b442b525 100644 --- a/modules/wchar +++ b/modules/wchar @@ -142,6 +142,7 @@ wchar.h: wchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) -e 's|@''REPLACE_WCWIDTH''@|$(REPLACE_WCWIDTH)|g' \ -e 's|@''REPLACE_WCSWIDTH''@|$(REPLACE_WCSWIDTH)|g' \ -e 's|@''REPLACE_WCSFTIME''@|$(REPLACE_WCSFTIME)|g' \ + -e 's|@''REPLACE_WCSCMP''@|$(REPLACE_WCSCMP)|g' \ -e 's|@''REPLACE_WCSSTR''@|$(REPLACE_WCSSTR)|g' \ -e 's|@''REPLACE_WCSTOK''@|$(REPLACE_WCSTOK)|g' \ -e 's|@''REPLACE_WMEMCMP''@|$(REPLACE_WMEMCMP)|g' \ diff --git a/modules/wcscmp b/modules/wcscmp index 1b4fde704a..6df70f0543 100644 --- a/modules/wcscmp +++ b/modules/wcscmp @@ -1,12 +1,6 @@ Description: wcscmp() function: compare two wide strings. -Status: -obsolete - -Notice: -This module is obsolete. - Files: lib/wcscmp.c lib/wcscmp-impl.h @@ -17,7 +11,8 @@ wchar configure.ac: gl_FUNC_WCSCMP -gl_CONDITIONAL([GL_COND_OBJ_WCSCMP], [test $HAVE_WCSCMP = 0]) +gl_CONDITIONAL([GL_COND_OBJ_WCSCMP], + [test $HAVE_WCSCMP = 0 || test $REPLACE_WCSCMP = 1]) gl_WCHAR_MODULE_INDICATOR([wcscmp]) Makefile.am: -- 2.34.1
From 9bf6bcc74b4caf4d74bab8d98a4e00f761f2e5ca Mon Sep 17 00:00:00 2001 From: Bruno Haible <[email protected]> Date: Wed, 19 Apr 2023 02:14:44 +0200 Subject: [PATCH 2/2] wcscmp: Add tests. * tests/test-wcscmp.c: New file, based on tests/unistr/test-strcmp.h. * modules/wcscmp-tests: New file. --- ChangeLog | 4 ++ modules/wcscmp-tests | 12 +++++ tests/test-wcscmp.c | 122 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 modules/wcscmp-tests create mode 100644 tests/test-wcscmp.c diff --git a/ChangeLog b/ChangeLog index bc02f2d5f5..a88dfc1d81 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2023-04-18 Bruno Haible <[email protected]> + wcscmp: Add tests. + * tests/test-wcscmp.c: New file, based on tests/unistr/test-strcmp.h. + * modules/wcscmp-tests: New file. + wcscmp: Work around two ISO C compliance bugs on several platforms. * lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP. * lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters diff --git a/modules/wcscmp-tests b/modules/wcscmp-tests new file mode 100644 index 0000000000..839d3175c4 --- /dev/null +++ b/modules/wcscmp-tests @@ -0,0 +1,12 @@ +Files: +tests/test-wcscmp.c +tests/signature.h +tests/macros.h + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-wcscmp +check_PROGRAMS += test-wcscmp diff --git a/tests/test-wcscmp.c b/tests/test-wcscmp.c new file mode 100644 index 0000000000..e117d3e319 --- /dev/null +++ b/tests/test-wcscmp.c @@ -0,0 +1,122 @@ +/* Test of wcscmp() function. + Copyright (C) 2010-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <[email protected]>, 2023. */ + +#include <config.h> + +#include <wchar.h> + +#include "signature.h" +SIGNATURE_CHECK (wcscmp, int, (const wchar_t *, const wchar_t *)); + +#include "macros.h" + +int +main (int argc, char *argv[]) +{ + /* Test simple cases. */ + { + static const wchar_t input1[] = { 0 }; + static const wchar_t input2[] = { 0 }; + ASSERT (wcscmp (input1, input2) == 0); + } + { + static const wchar_t input1[] = { 0 }; + static const wchar_t input2[] = { 'f', 'o', 'o', 0 }; + ASSERT (wcscmp (input1, input2) < 0); + ASSERT (wcscmp (input2, input1) > 0); + } + { + static const wchar_t input1[] = { 'f', 'o', 'o', 0 }; + static const wchar_t input2[] = { 'f', 'o', 'o', 0 }; + ASSERT (wcscmp (input1, input2) == 0); + } + { + static const wchar_t input1[] = { 'f', 'o', 'o', 0 }; + static const wchar_t input2[] = { 'b', 'a', 'r', 0 }; + ASSERT (wcscmp (input1, input2) > 0); + ASSERT (wcscmp (input2, input1) < 0); + } + { + static const wchar_t input1[] = { 'f', 'o', 'o', 0 }; + static const wchar_t input2[] = { 'f', 'o', 'o', 'b', 'a', 'r', 0 }; + ASSERT (wcscmp (input1, input2) < 0); + ASSERT (wcscmp (input2, input1) > 0); + } + { + static const wchar_t input1[] = { 'o', 'o', 'm', 'p', 'h', 0 }; + static const wchar_t input2[] = { 'o', 'o', 'p', 's', 0 }; + ASSERT (wcscmp (input1, input2) < 0); + ASSERT (wcscmp (input2, input1) > 0); + } + + /* ISO C requires wcscmp to work with all wchar_t values. + ISO C 17 § 7.29.4.4 says: + "Unless explicitly stated otherwise, the functions described in this + subclause order two wide characters the same way as two integers of + the underlying integer type designated by wchar_t." */ + { + static const wchar_t input1[] = { (wchar_t) 0x76547654, 0 }; + static const wchar_t input2[] = { (wchar_t) 0x9abc9abc, 0 }; + if ((wchar_t)-1 < 0) + { + /* wchar_t is signed. */ + ASSERT (wcscmp (input1, input2) > 0); + ASSERT (wcscmp (input2, input1) < 0); + } + else + { + /* wchar_t is unsigned. */ + ASSERT (wcscmp (input1, input2) < 0); + ASSERT (wcscmp (input2, input1) > 0); + } + } + { + static const wchar_t input1[] = { (wchar_t) 0x9abc9abc, 0 }; + static const wchar_t input2[] = { (wchar_t) 0x9bdf9bdf, 0 }; + ASSERT (wcscmp (input1, input2) < 0); + ASSERT (wcscmp (input2, input1) > 0); + } + + /* Comparing a negative wchar_t value against a null wchar_t. + ISO C 17 § 7.29.4.4.1 says: + "The wcscmp function compares the wide string pointed to by s1 to + the wide string pointed to by s2." + ISO C 17 § 7.1.1 defines the term "wide string": + "A wide string is a contiguous sequence of wide characters terminated + by and including the first null wide character." + This means that the comparison extends up to and *including* the first + null wchar_t. */ + { + static const wchar_t input1[] = { (wchar_t) 'x', 0 }; + static const wchar_t input2[] = { (wchar_t) 'x', (wchar_t) 0x9abc9abc, 0 }; + if ((wchar_t)-1 < 0) + { + /* wchar_t is signed. */ + ASSERT (wcscmp (input1, input2) > 0); + ASSERT (wcscmp (input2, input1) < 0); + } + else + { + /* wchar_t is unsigned. */ + ASSERT (wcscmp (input1, input2) < 0); + ASSERT (wcscmp (input2, input1) > 0); + } + } + + return 0; +} -- 2.34.1
