This should help merges changes from Gawk, which always uses the
char32_t API though that’s sometimes implemented with the wchar_t
API even on platforms where wchar_t and char32_t act differently.
The idea is to use char32_t uniformly in both the dfa and regex
modules, so that they get consistent answers on all platforms.
* lib/dfa.c, lib/localeinfo.c, lib/localeinfo.h: If GAWK, do not
include <wctype.h> or redefine the Gnulib char32_t types and
functions to be wchar.h and wctype.h functions or define mbszero
and streq, as I think I have a better way to do this with Gawk
that is less intrusive here; instead, always include <uchar.h>.
* lib/dfa.c: Do not include <wchar.h>.  Include "getext.h" before
including "xalloc.h" and "localinfo.h", as Gnulib doesn’t care
about the order and this works better with Gawk’s way of overriding Gnulib.
(parse_bracket_exp): Use && instead of &; either is correct and
both are equally fast nowadays but && triggers a warning in some
Gawk compiles.
* lib/dfa.h (_GL_ATTRIBUTE_MALLOC, _GL_ATTRIBUTE_DEALLOC)
(_GL_ATTRIBUTE_DEALLOC_FREE)
(_GL_ATTRIBUTE_RETURNS_NONNULL) [!_GL_ATTRIBUTE_MALLOC]:
Remove, as Gawk’s custom.h can define them.
* lib/localeinfo.c: Go back to using <verify.h> and ‘verify’
instead of using static_assert which Gawk can’t easily use because
it does not use Gnulib’s assert-h module.
* lib/localeinfo.h: Do not include <limits.h>, avoding some
namespace pollution.
(struct localeinfo): Use (unsigned char) -1 instead of UCHAR_MAX
to avoid the need to include <limits.h>.
* modules/dfa (Depends-on): Remove wchar-h.
---
 ChangeLog        | 30 ++++++++++++++++++++++++++++++
 lib/dfa.c        | 25 +++++--------------------
 lib/dfa.h        | 11 ++---------
 lib/localeinfo.c | 21 ++++++---------------
 lib/localeinfo.h | 14 +++-----------
 modules/dfa      |  1 -
 6 files changed, 46 insertions(+), 56 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6a07acdad5..13cdd0d1fb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,35 @@
 2026-04-24  Paul Eggert  <[email protected]>
 
+       dfa: always use char32_t not wchar_t
+       This should help merges changes from Gawk, which always uses the
+       char32_t API though that’s sometimes implemented with the wchar_t
+       API even on platforms where wchar_t and char32_t act differently.
+       The idea is to use char32_t uniformly in both the dfa and regex
+       modules, so that they get consistent answers on all platforms.
+       * lib/dfa.c, lib/localeinfo.c, lib/localeinfo.h: If GAWK, do not
+       include <wctype.h> or redefine the Gnulib char32_t types and
+       functions to be wchar.h and wctype.h functions or define mbszero
+       and streq, as I think I have a better way to do this with Gawk
+       that is less intrusive here; instead, always include <uchar.h>.
+       * lib/dfa.c: Do not include <wchar.h>.  Include "getext.h" before
+       including "xalloc.h" and "localinfo.h", as Gnulib doesn’t care
+       about the order and this works better with Gawk’s way of overriding 
Gnulib.
+       (parse_bracket_exp): Use && instead of &; either is correct and
+       both are equally fast nowadays but && triggers a warning in some
+       Gawk compiles.
+       * lib/dfa.h (_GL_ATTRIBUTE_MALLOC, _GL_ATTRIBUTE_DEALLOC)
+       (_GL_ATTRIBUTE_DEALLOC_FREE)
+       (_GL_ATTRIBUTE_RETURNS_NONNULL) [!_GL_ATTRIBUTE_MALLOC]:
+       Remove, as Gawk’s custom.h can define them.
+       * lib/localeinfo.c: Go back to using <verify.h> and ‘verify’
+       instead of using static_assert which Gawk can’t easily use because
+       it does not use Gnulib’s assert-h module.
+       * lib/localeinfo.h: Do not include <limits.h>, avoding some
+       namespace pollution.
+       (struct localeinfo): Use (unsigned char) -1 instead of UCHAR_MAX
+       to avoid the need to include <limits.h>.
+       * modules/dfa (Depends-on): Remove wchar-h.
+
        uchar-h: <string.h> etc. namespace cleanup
        This is only a partial cleanup; to be cleaner we’d need to
        move declarations of Gnulib extensions like c32isalpha
diff --git a/lib/dfa.c b/lib/dfa.c
index 7bbe94405b..7719e80669 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -33,29 +33,14 @@
 #include <stdlib.h>
 #include <limits.h>
 #include <string.h>
-#include <wchar.h>
-
-#include "xalloc.h"
-#include "localeinfo.h"
 
 #include "gettext.h"
 #define _(msgid) dgettext (GNULIB_TEXT_DOMAIN, msgid)
 
-#if GAWK
-/* Use ISO C 99 API.  */
-# include <wctype.h>
-# define char32_t wchar_t
-# define mbrtoc32 mbrtowc
-# define c32rtomb wcrtomb
-# define c32tob wctob
-# define c32isprint iswprint
-# define c32isspace iswspace
-# define mbszero(p) memset (p, 0, sizeof (mbstate_t))
-# define streq(a, b) (strcmp (a, b) == 0)
-#else
-/* Use ISO C 11 + gnulib API.  */
-# include <uchar.h>
-#endif
+#include "xalloc.h"
+#include "localeinfo.h"
+
+#include <uchar.h>
 
 /* Pacify gcc -Wanalyzer-null-dereference in areas where GCC
    understandably cannot deduce that the input comes from a
@@ -1099,7 +1084,7 @@ parse_bracket_exp (struct dfa *dfa)
               if (wc != wc2 || wc == WEOF)
                 {
                   if (dfa->localeinfo.simple
-                      || (c_isdigit (c) & c_isdigit (c2)))
+                      || (c_isdigit (c) && c_isdigit (c2)))
                     {
                       for (int ci = c; ci <= c2; ci++)
                         if (dfa->syntax.case_fold && isalpha (ci))
diff --git a/lib/dfa.h b/lib/dfa.h
index f3454f0c0e..71a944176e 100644
--- a/lib/dfa.h
+++ b/lib/dfa.h
@@ -19,7 +19,8 @@
 #ifndef DFA_H_
 #define DFA_H_
 
-/* This file uses _Noreturn, _GL_ATTRIBUTE_DEALLOC, _GL_ATTRIBUTE_MALLOC,
+/* This file uses _Noreturn, _GL_ATTRIBUTE_DEALLOC,
+   _GL_ATTRIBUTE_DEALLOC_FREE, _GL_ATTRIBUTE_MALLOC,
    _GL_ATTRIBUTE_PURE, _GL_ATTRIBUTE_RETURNS_NONNULL.  */
 #if !_GL_CONFIG_H_INCLUDED
  #error "Please include config.h first."
@@ -49,14 +50,6 @@ struct dfamust
 /* The dfa structure. It is completely opaque. */
 struct dfa;
 
-/* Needed when Gnulib is not used.  */
-#ifndef _GL_ATTRIBUTE_MALLOC
-# define _GL_ATTRIBUTE_MALLOC
-# define _GL_ATTRIBUTE_DEALLOC(f, i)
-# define _GL_ATTRIBUTE_DEALLOC_FREE
-# define _GL_ATTRIBUTE_RETURNS_NONNULL
-#endif
-
 /* Entry points. */
 
 /* Allocate a struct dfa.  The struct dfa is completely opaque.
diff --git a/lib/localeinfo.c b/lib/localeinfo.c
index 89a169a462..3d1ee529a7 100644
--- a/lib/localeinfo.c
+++ b/lib/localeinfo.c
@@ -21,25 +21,16 @@
 
 #include <localeinfo.h>
 
+#include <verify.h>
+
 #include <limits.h>
 #include <locale.h>
 #include <stdlib.h>
 #include <string.h>
-#if GAWK
-/* Use ISO C 99 API.  */
-# include <wctype.h>
-# define char32_t wchar_t
-# define mbrtoc32 mbrtowc
-# define c32tolower towlower
-# define c32toupper towupper
-# define mbszero(p) memset (p, 0, sizeof (mbstate_t))
-#else
-/* Use ISO C 11 + gnulib API.  */
-# include <uchar.h>
-#endif
+#include <uchar.h>
 
 /* The sbclen implementation relies on this.  */
-static_assert (MB_LEN_MAX <= SCHAR_MAX);
+verify (MB_LEN_MAX <= SCHAR_MAX);
 
 /* Return true if the locale uses UTF-8.  */
 
@@ -130,8 +121,8 @@ static unsigned short int const lonesome_lower[] =
 
 /* Verify that the worst case fits.  This is 1 for towupper, 1 for
    towlower, and 1 for each entry in LONESOME_LOWER.  */
-static_assert (1 + 1 + sizeof lonesome_lower / sizeof *lonesome_lower
-               <= CASE_FOLDED_BUFSIZE);
+verify (1 + 1 + sizeof lonesome_lower / sizeof *lonesome_lower
+        <= CASE_FOLDED_BUFSIZE);
 
 /* Find the characters equal to C after case-folding, other than C
    itself, and store them into FOLDED.  Return the number of characters
diff --git a/lib/localeinfo.h b/lib/localeinfo.h
index 7e6ecbc014..3bc84564be 100644
--- a/lib/localeinfo.h
+++ b/lib/localeinfo.h
@@ -17,15 +17,7 @@
 
 /* Written by Paul Eggert.  */
 
-#include <limits.h>
-#include <wchar.h>
-#if GAWK
-/* Use ISO C 99 API.  */
-# define char32_t wchar_t
-#else
-/* Use ISO C 11 + gnulib API.  */
-# include <uchar.h>
-#endif
+#include <uchar.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -50,13 +42,13 @@ struct localeinfo
      single-byte character, -1 if B is an encoding error, and -2 if B
      is the leading byte of a multibyte character that contains more
      than one byte.  */
-  signed char sbclen[UCHAR_MAX + 1];
+  signed char sbclen[(unsigned char) -1 + 1];
 
   /* An array indexed by byte values B that contains the corresponding
      32-bit wide character (if any) for B if sbclen[B] == 1.  WEOF means
      the byte is not a valid single-byte character, i.e., sbclen[B] == -1
      or -2.  */
-  wint_t sbctowc[UCHAR_MAX + 1];
+  wint_t sbctowc[(unsigned char) -1 + 1];
 };
 
 extern void init_localeinfo (struct localeinfo *);
diff --git a/modules/dfa b/modules/dfa
index 5f9610109e..f06d747a6d 100644
--- a/modules/dfa
+++ b/modules/dfa
@@ -38,7 +38,6 @@ uchar-h
 # to enable it.
 #uchar-h-c23
 verify
-wchar-h
 xalloc
 xalloc-die
 
-- 
2.53.0


Reply via email to