On 11/6/21 16:10, Bruno Haible wrote:
It looks like it's caused by the automatic pull-in from glibc.
I would guess that the type 'uint32_t' is visible from glibc-internal
headers in the glibc builds, but not through POSIX headers.

I think uint32_t is visible via stdint.h (via regex_internal.h) so that's not the issue.

The underlying problem will take a bit more time to fix. Basically, we should remove some of the Gnulib-specific stuff, which was written back when Gnulib didn't have substitutes for wctype.h etc. In the meantime it's better to break the link between glibc and gnulib for this file so I installed the attached as a workaround.
From b9b1bd31378af853378f43047beca0c4174a67d5 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 7 Nov 2021 23:31:08 -0800
Subject: [PATCH] regex: break regcomp.c link with glibc

Problem reported by Bruno Haible in:
https://lists.gnu.org/r/bug-gnulib/2021-11/msg00005.html
* config/srclist.txt: Comment out regcomp.c for now.
* lib/regcomp.c: Revert previous change.
---
 ChangeLog          |   8 +
 config/srclist.txt |   2 +-
 lib/regcomp.c      | 464 ++++++++++++++++++++++-----------------------
 3 files changed, 232 insertions(+), 242 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6aaa53991..bde24b951 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2021-11-07  Paul Eggert  <egg...@cs.ucla.edu>
+
+	regex: break regcomp.c link with glibc
+	Problem reported by Bruno Haible in:
+	https://lists.gnu.org/r/bug-gnulib/2021-11/msg00005.html
+	* config/srclist.txt: Comment out regcomp.c for now.
+	* lib/regcomp.c: Revert previous change.
+
 2021-10-24  Bruno Haible  <br...@clisp.org>
 
 	string: Avoid syntax error re strdup in string.in.h (regr. 2021-09-07).
diff --git a/config/srclist.txt b/config/srclist.txt
index 83e251def..e36501239 100644
--- a/config/srclist.txt
+++ b/config/srclist.txt
@@ -65,7 +65,7 @@ $LIBCSRC malloc/scratch_buffer_grow_preserve.c	lib/malloc
 $LIBCSRC malloc/scratch_buffer_set_array_size.c	lib/malloc
 $LIBCSRC include/intprops.h             lib
 $LIBCSRC misc/sys/cdefs.h		lib
-$LIBCSRC posix/regcomp.c		lib
+#$LIBCSRC posix/regcomp.c		lib
 $LIBCSRC posix/regex.c			lib
 $LIBCSRC posix/regex.h			lib
 $LIBCSRC posix/regex_internal.c		lib
diff --git a/lib/regcomp.c b/lib/regcomp.c
index e2b5c2807..887e5b506 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -2831,277 +2831,262 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
 }
 #endif /* not _LIBC */
 
-#ifdef _LIBC
-/* Local function for parse_bracket_exp used in _LIBC environment.
-   Seek the collating symbol entry corresponding to NAME.
-   Return the index of the symbol in the SYMB_TABLE,
-   or -1 if not found.  */
+/* This function parse bracket expression like "[abc]", "[a-c]",
+   "[[.a-a.]]" etc.  */
 
-static inline int32_t
-__attribute__ ((always_inline))
-seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
-			     const int32_t *symb_table, int32_t table_size,
-			     const unsigned char *extra)
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+		   reg_syntax_t syntax, reg_errcode_t *err)
 {
-  int32_t elem;
-
-  for (elem = 0; elem < table_size; elem++)
-    if (symb_table[2 * elem] != 0)
-      {
-	int32_t idx = symb_table[2 * elem + 1];
-	/* Skip the name of collating element name.  */
-	idx += 1 + extra[idx];
-	if (/* Compare the length of the name.  */
-	    name_len == extra[idx]
-	    /* Compare the name.  */
-	    && memcmp (name, &extra[idx + 1], name_len) == 0)
-	  /* Yep, this is the entry.  */
-	  return elem;
-      }
-  return -1;
-}
+#ifdef _LIBC
+  const unsigned char *collseqmb;
+  const char *collseqwc;
+  uint32_t nrules;
+  int32_t table_size;
+  const int32_t *symb_table;
+  const unsigned char *extra;
 
-/* Local function for parse_bracket_exp used in _LIBC environment.
-   Look up the collation sequence value of BR_ELEM.
-   Return the value if succeeded, UINT_MAX otherwise.  */
+  /* Local function for parse_bracket_exp used in _LIBC environment.
+     Seek the collating symbol entry corresponding to NAME.
+     Return the index of the symbol in the SYMB_TABLE,
+     or -1 if not found.  */
 
-static inline unsigned int
-__attribute__ ((always_inline))
-lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
-				 const unsigned char *collseqmb,
-				 const char *collseqwc, int32_t table_size,
-				 const int32_t *symb_table,
-				 const unsigned char *extra)
-{
-  if (br_elem->type == SB_CHAR)
-    {
-      /* if (MB_CUR_MAX == 1) */
-      if (nrules == 0)
-	return collseqmb[br_elem->opr.ch];
-      else
-	{
-	  wint_t wc = __btowc (br_elem->opr.ch);
-	  return __collseq_table_lookup (collseqwc, wc);
-	}
-    }
-  else if (br_elem->type == MB_CHAR)
+  auto inline int32_t
+  __attribute__ ((always_inline))
+  seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
     {
-      if (nrules != 0)
-	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+      int32_t elem;
+
+      for (elem = 0; elem < table_size; elem++)
+	if (symb_table[2 * elem] != 0)
+	  {
+	    int32_t idx = symb_table[2 * elem + 1];
+	    /* Skip the name of collating element name.  */
+	    idx += 1 + extra[idx];
+	    if (/* Compare the length of the name.  */
+		name_len == extra[idx]
+		/* Compare the name.  */
+		&& memcmp (name, &extra[idx + 1], name_len) == 0)
+	      /* Yep, this is the entry.  */
+	      return elem;
+	  }
+      return -1;
     }
-  else if (br_elem->type == COLL_SYM)
+
+  /* Local function for parse_bracket_exp used in _LIBC environment.
+     Look up the collation sequence value of BR_ELEM.
+     Return the value if succeeded, UINT_MAX otherwise.  */
+
+  auto inline unsigned int
+  __attribute__ ((always_inline))
+  lookup_collation_sequence_value (bracket_elem_t *br_elem)
     {
-      size_t sym_name_len = strlen ((char *) br_elem->opr.name);
-      if (nrules != 0)
+      if (br_elem->type == SB_CHAR)
 	{
-	  int32_t elem, idx;
-	  elem = seek_collating_symbol_entry (br_elem->opr.name,
-					      sym_name_len,
-					      symb_table, table_size,
-					      extra);
-	  if (elem != -1)
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    return collseqmb[br_elem->opr.ch];
+	  else
 	    {
-	      /* We found the entry.  */
-	      idx = symb_table[2 * elem + 1];
-	      /* Skip the name of collating element name.  */
-	      idx += 1 + extra[idx];
-	      /* Skip the byte sequence of the collating element.  */
-	      idx += 1 + extra[idx];
-	      /* Adjust for the alignment.  */
-	      idx = (idx + 3) & ~3;
-	      /* Skip the multibyte collation sequence value.  */
-	      idx += sizeof (unsigned int);
-	      /* Skip the wide char sequence of the collating element.  */
-	      idx += sizeof (unsigned int) *
-		(1 + *(unsigned int *) (extra + idx));
-	      /* Return the collation sequence value.  */
-	      return *(unsigned int *) (extra + idx);
+	      wint_t wc = __btowc (br_elem->opr.ch);
+	      return __collseq_table_lookup (collseqwc, wc);
 	    }
-	  else if (sym_name_len == 1)
+	}
+      else if (br_elem->type == MB_CHAR)
+	{
+	  if (nrules != 0)
+	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+	}
+      else if (br_elem->type == COLL_SYM)
+	{
+	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+	  if (nrules != 0)
 	    {
-	      /* No valid character.  Match it as a single byte
-		 character.  */
-	      return collseqmb[br_elem->opr.name[0]];
+	      int32_t elem, idx;
+	      elem = seek_collating_symbol_entry (br_elem->opr.name,
+						  sym_name_len);
+	      if (elem != -1)
+		{
+		  /* We found the entry.  */
+		  idx = symb_table[2 * elem + 1];
+		  /* Skip the name of collating element name.  */
+		  idx += 1 + extra[idx];
+		  /* Skip the byte sequence of the collating element.  */
+		  idx += 1 + extra[idx];
+		  /* Adjust for the alignment.  */
+		  idx = (idx + 3) & ~3;
+		  /* Skip the multibyte collation sequence value.  */
+		  idx += sizeof (unsigned int);
+		  /* Skip the wide char sequence of the collating element.  */
+		  idx += sizeof (unsigned int) *
+		    (1 + *(unsigned int *) (extra + idx));
+		  /* Return the collation sequence value.  */
+		  return *(unsigned int *) (extra + idx);
+		}
+	      else if (sym_name_len == 1)
+		{
+		  /* No valid character.  Match it as a single byte
+		     character.  */
+		  return collseqmb[br_elem->opr.name[0]];
+		}
 	    }
+	  else if (sym_name_len == 1)
+	    return collseqmb[br_elem->opr.name[0]];
 	}
-      else if (sym_name_len == 1)
-	return collseqmb[br_elem->opr.name[0]];
+      return UINT_MAX;
     }
-  return UINT_MAX;
-}
 
-/* Local function for parse_bracket_exp used in _LIBC environment.
-   Build the range expression which starts from START_ELEM, and ends
-   at END_ELEM.  The result are written to MBCSET and SBCSET.
-   RANGE_ALLOC is the allocated size of mbcset->range_starts, and
-   mbcset->range_ends, is a pointer argument since we may
-   update it.  */
-
-static inline reg_errcode_t
-__attribute__ ((always_inline))
-build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
-		 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
-		 re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
-		 const unsigned char *collseqmb, const char *collseqwc,
-		 int32_t table_size, const int32_t *symb_table,
-		 const unsigned char *extra)
-{
-  unsigned int ch;
-  uint32_t start_collseq;
-  uint32_t end_collseq;
+  /* Local function for parse_bracket_exp used in _LIBC environment.
+     Build the range expression which starts from START_ELEM, and ends
+     at END_ELEM.  The result are written to MBCSET and SBCSET.
+     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+     mbcset->range_ends, is a pointer argument since we may
+     update it.  */
 
-  /* Equivalence Classes and Character Classes can't be a range
-     start/end.  */
-  if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
-                        || start_elem->type == CHAR_CLASS
-                        || end_elem->type == EQUIV_CLASS
-                        || end_elem->type == CHAR_CLASS))
-    return REG_ERANGE;
+  auto inline reg_errcode_t
+  __attribute__ ((always_inline))
+  build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+		   bracket_elem_t *start_elem, bracket_elem_t *end_elem)
+    {
+      unsigned int ch;
+      uint32_t start_collseq;
+      uint32_t end_collseq;
+
+      /* Equivalence Classes and Character Classes can't be a range
+	 start/end.  */
+      if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
+			    || start_elem->type == CHAR_CLASS
+			    || end_elem->type == EQUIV_CLASS
+			    || end_elem->type == CHAR_CLASS))
+	return REG_ERANGE;
 
-  /* FIXME: Implement rational ranges here, too.  */
-  start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
-						   table_size, symb_table, extra);
-  end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
-						 table_size, symb_table, extra);
-  /* Check start/end collation sequence values.  */
-  if (__glibc_unlikely (start_collseq == UINT_MAX
-                        || end_collseq == UINT_MAX))
-    return REG_ECOLLATE;
-  if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
-                        && start_collseq > end_collseq))
-    return REG_ERANGE;
+      /* FIXME: Implement rational ranges here, too.  */
+      start_collseq = lookup_collation_sequence_value (start_elem);
+      end_collseq = lookup_collation_sequence_value (end_elem);
+      /* Check start/end collation sequence values.  */
+      if (__glibc_unlikely (start_collseq == UINT_MAX
+			    || end_collseq == UINT_MAX))
+	return REG_ECOLLATE;
+      if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
+			    && start_collseq > end_collseq))
+	return REG_ERANGE;
 
-  /* Got valid collation sequence values, add them as a new entry.
-     However, if we have no collation elements, and the character set
-     is single byte, the single byte character set that we
-     build below suffices. */
-  if (nrules > 0 || dfa->mb_cur_max > 1)
-    {
-      /* Check the space of the arrays.  */
-      if (__glibc_unlikely (*range_alloc == mbcset->nranges))
+      /* Got valid collation sequence values, add them as a new entry.
+	 However, if we have no collation elements, and the character set
+	 is single byte, the single byte character set that we
+	 build below suffices. */
+      if (nrules > 0 || dfa->mb_cur_max > 1)
 	{
-	  /* There is not enough space, need realloc.  */
-	  uint32_t *new_array_start;
-	  uint32_t *new_array_end;
-	  int new_nranges;
+	  /* Check the space of the arrays.  */
+	  if (__glibc_unlikely (*range_alloc == mbcset->nranges))
+	    {
+	      /* There is not enough space, need realloc.  */
+	      uint32_t *new_array_start;
+	      uint32_t *new_array_end;
+	      Idx new_nranges;
+
+	      /* +1 in case of mbcset->nranges is 0.  */
+	      new_nranges = 2 * mbcset->nranges + 1;
+	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+					    new_nranges);
+	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+					  new_nranges);
 
-	  /* +1 in case of mbcset->nranges is 0.  */
-	  new_nranges = 2 * mbcset->nranges + 1;
-	  new_array_start = re_realloc (mbcset->range_starts, uint32_t,
-					new_nranges);
-	  new_array_end = re_realloc (mbcset->range_ends, uint32_t,
-				      new_nranges);
+	      if (__glibc_unlikely (new_array_start == NULL
+				    || new_array_end == NULL))
+		return REG_ESPACE;
 
-          if (__glibc_unlikely (new_array_start == NULL
-                                || new_array_end == NULL))
-	    return REG_ESPACE;
+	      mbcset->range_starts = new_array_start;
+	      mbcset->range_ends = new_array_end;
+	      *range_alloc = new_nranges;
+	    }
 
-	  mbcset->range_starts = new_array_start;
-	  mbcset->range_ends = new_array_end;
-	  *range_alloc = new_nranges;
+	  mbcset->range_starts[mbcset->nranges] = start_collseq;
+	  mbcset->range_ends[mbcset->nranges++] = end_collseq;
 	}
 
-      mbcset->range_starts[mbcset->nranges] = start_collseq;
-      mbcset->range_ends[mbcset->nranges++] = end_collseq;
-    }
-
-  /* Build the table for single byte characters.  */
-  for (ch = 0; ch < SBC_MAX; ch++)
-    {
-      uint32_t ch_collseq;
-      /* if (MB_CUR_MAX == 1) */
-      if (nrules == 0)
-	ch_collseq = collseqmb[ch];
-      else
-	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
-      if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
-	bitset_set (sbcset, ch);
+      /* Build the table for single byte characters.  */
+      for (ch = 0; ch < SBC_MAX; ch++)
+	{
+	  uint32_t ch_collseq;
+	  /*
+	  if (MB_CUR_MAX == 1)
+	  */
+	  if (nrules == 0)
+	    ch_collseq = collseqmb[ch];
+	  else
+	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+	    bitset_set (sbcset, ch);
+	}
+      return REG_NOERROR;
     }
-  return REG_NOERROR;
-}
 
-/* Local function for parse_bracket_exp used in _LIBC environment.
-   Build the collating element which is represented by NAME.
-   The result are written to MBCSET and SBCSET.
-   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
-   pointer argument since we may update it.  */
+  /* Local function for parse_bracket_exp used in _LIBC environment.
+     Build the collating element which is represented by NAME.
+     The result are written to MBCSET and SBCSET.
+     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+     pointer argument since we may update it.  */
 
-static inline reg_errcode_t
-__attribute__ ((always_inline))
-build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
-			int *coll_sym_alloc, const unsigned char *name,
-			uint32_t nrules, int32_t table_size,
-			const int32_t *symb_table, const unsigned char *extra)
-{
-  int32_t elem, idx;
-  size_t name_len = strlen ((const char *) name);
-  if (nrules != 0)
+  auto inline reg_errcode_t
+  __attribute__ ((always_inline))
+  build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+			  Idx *coll_sym_alloc, const unsigned char *name)
     {
-      elem = seek_collating_symbol_entry (name, name_len, symb_table,
-					  table_size, extra);
-      if (elem != -1)
-	{
-	  /* We found the entry.  */
-	  idx = symb_table[2 * elem + 1];
-	  /* Skip the name of collating element name.  */
-	  idx += 1 + extra[idx];
-	}
-      else if (name_len == 1)
+      int32_t elem, idx;
+      size_t name_len = strlen ((const char *) name);
+      if (nrules != 0)
 	{
-	  /* No valid character, treat it as a normal
-	     character.  */
-	  bitset_set (sbcset, name[0]);
-	  return REG_NOERROR;
-	}
-      else
-	return REG_ECOLLATE;
+	  elem = seek_collating_symbol_entry (name, name_len);
+	  if (elem != -1)
+	    {
+	      /* We found the entry.  */
+	      idx = symb_table[2 * elem + 1];
+	      /* Skip the name of collating element name.  */
+	      idx += 1 + extra[idx];
+	    }
+	  else if (name_len == 1)
+	    {
+	      /* No valid character, treat it as a normal
+		 character.  */
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
+	  else
+	    return REG_ECOLLATE;
 
-      /* Got valid collation sequence, add it as a new entry.  */
-      /* Check the space of the arrays.  */
-      if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
-	{
-	  /* Not enough, realloc it.  */
-	  /* +1 in case of mbcset->ncoll_syms is 0.  */
-	  int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
-	  /* Use realloc since mbcset->coll_syms is NULL
-	     if *alloc == 0.  */
-	  int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
-					       new_coll_sym_alloc);
-          if (__glibc_unlikely (new_coll_syms == NULL))
-	    return REG_ESPACE;
-	  mbcset->coll_syms = new_coll_syms;
-	  *coll_sym_alloc = new_coll_sym_alloc;
+	  /* Got valid collation sequence, add it as a new entry.  */
+	  /* Check the space of the arrays.  */
+	  if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
+	    {
+	      /* Not enough, realloc it.  */
+	      /* +1 in case of mbcset->ncoll_syms is 0.  */
+	      Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+	      /* Use realloc since mbcset->coll_syms is NULL
+		 if *alloc == 0.  */
+	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+						   new_coll_sym_alloc);
+	      if (__glibc_unlikely (new_coll_syms == NULL))
+		return REG_ESPACE;
+	      mbcset->coll_syms = new_coll_syms;
+	      *coll_sym_alloc = new_coll_sym_alloc;
+	    }
+	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+	  return REG_NOERROR;
 	}
-      mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
-      return REG_NOERROR;
-    }
-  else
-    {
-      if (__glibc_unlikely (name_len != 1))
-	return REG_ECOLLATE;
       else
 	{
-	  bitset_set (sbcset, name[0]);
-	  return REG_NOERROR;
+	  if (__glibc_unlikely (name_len != 1))
+	    return REG_ECOLLATE;
+	  else
+	    {
+	      bitset_set (sbcset, name[0]);
+	      return REG_NOERROR;
+	    }
 	}
     }
-}
-#endif /* _LIBC */
-
-/* This function parse bracket expression like "[abc]", "[a-c]",
-   "[[.a-a.]]" etc.  */
-
-static bin_tree_t *
-parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
-		   reg_syntax_t syntax, reg_errcode_t *err)
-{
-#ifdef _LIBC
-  const unsigned char *collseqmb;
-  const char *collseqwc = NULL;
-  uint32_t nrules;
-  int32_t table_size = 0;
-  const int32_t *symb_table = NULL;
-  const unsigned char *extra = NULL;
 #endif
 
   re_token_t br_token;
@@ -3245,9 +3230,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 
 #ifdef _LIBC
 	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
-				  &start_elem, &end_elem,
-				  dfa, syntax, nrules, collseqmb, collseqwc,
-				  table_size, symb_table, extra);
+				  &start_elem, &end_elem);
 #else
 # ifdef RE_ENABLE_I18N
 	  *err = build_range_exp (syntax, sbcset,
@@ -3300,8 +3283,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 #ifdef RE_ENABLE_I18N
 					     mbcset, &coll_sym_alloc,
 #endif /* RE_ENABLE_I18N */
-					     start_elem.opr.name,
-					     nrules, table_size, symb_table, extra);
+					     start_elem.opr.name);
 	      if (__glibc_unlikely (*err != REG_NOERROR))
 		goto parse_bracket_exp_free_return;
 	      break;
-- 
2.32.0

Reply via email to