On 7/14/21 8:51 AM, 林宏雄 wrote:
We expect that configure should not use the included regex.c
on the system with up-to-date glibc.

Recently that has not been the case, as glibc has been trailing behind Gnulib a bit. At some point I hope they will become more in sync.

Returning 0 by re_compile_pattern() is correct behavior. It should not fail.
Thank you for reporting the problem. There is no formal spec for re_compile_pattern and the regular expression is in some sense invalid and in another sense valid, so it's not clear which is the correct behavior here. However, Gnulib should allow the latest glibc behavior, as that behavior is arguably correct.

I installed the attached patches. The first patch merely fixes some longstanding quoting problems. The second patch updates the Gnulib tests to match the latest Gnulib, and to allow glibc implementations that behave as you describe. However, glibc 2.33 (the current version) still fails to pass the configure-time test, due to glibc bug 11053 which is fixed in Gnulib; see:

https://sourceware.org/bugzilla/show_bug.cgi?id=11053

>From 11e6fc32e9c5c770150b47bc0260b1b91110eba0 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 14 Jul 2021 23:23:20 -0500
Subject: [PATCH 1/2] regex: fix shell quoting problem in configuration

* m4/regex.m4 (gl_REGEX): Fix quoting problems.
These C programs are put into unquoted here-documents,
so $ and \ need to be quoted.
---
 ChangeLog   | 7 +++++++
 m4/regex.m4 | 6 +++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 25d85aa4a..00d31cdc7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2021-07-14  Paul Eggert  <egg...@cs.ucla.edu>
+
+	regex: fix shell quoting problem in configuration
+	* m4/regex.m4 (gl_REGEX): Fix quoting problems.
+	These C programs are put into unquoted here-documents,
+	so $ and \ need to be quoted.
+
 2021-07-08  Paul Eggert  <egg...@cs.ucla.edu>
 
 	select: port better to MinGW
diff --git a/m4/regex.m4 b/m4/regex.m4
index 850c57222..0e1bafef2 100644
--- a/m4/regex.m4
+++ b/m4/regex.m4
@@ -1,4 +1,4 @@
-# serial 71
+# serial 72
 
 # Copyright (C) 1996-2001, 2003-2021 Free Software Foundation, Inc.
 #
@@ -246,7 +246,7 @@ AC_DEFUN([gl_REGEX],
                            & ~RE_CONTEXT_INVALID_DUP
                            & ~RE_NO_EMPTY_RANGES);
             memset (&regex, 0, sizeof regex);
-            s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, &regex);
+            s = re_compile_pattern ("[[:alnum:]_-]\\\\+\$", 16, &regex);
             if (s)
               result |= 32;
             else
@@ -264,7 +264,7 @@ AC_DEFUN([gl_REGEX],
                back reference.  */
             re_set_syntax (RE_SYNTAX_POSIX_EGREP);
             memset (&regex, 0, sizeof regex);
-            s = re_compile_pattern ("0|()0|\\1|0", 10, &regex);
+            s = re_compile_pattern ("0|()0|\\\\1|0", 10, &regex);
             if (!s)
               result |= 64;
             else
-- 
2.25.1

>From e707dbe7c6da9dd8300cb3d60141f144a7a5d5b1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 14 Jul 2021 23:55:30 -0500
Subject: [PATCH 2/2] regex: modernize to newer regex bugset

Problem reported by Hiroo Hayashi in:
https://lists.gnu.org/r/bug-gnulib/2021-07/msg00024.html
* m4/regex.m4 (gl_REGEX): Allow newer glibc behavior for ()0|\1,
behavior where the regex compiles but does not match.
Test for glibc bug 11053.
* tests/test-regex.c (bug_regex11, main): Add casts needed
for printf portability.
(main): Allow newer glibc behavior for ()0|\1.
---
 ChangeLog          | 10 ++++++++++
 m4/regex.m4        | 40 ++++++++++++++++++++++++++++++++++++++--
 tests/test-regex.c | 11 ++++++-----
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 00d31cdc7..78590feae 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
 2021-07-14  Paul Eggert  <egg...@cs.ucla.edu>
 
+	regex: modernize to newer regex bugset
+	Problem reported by Hiroo Hayashi in:
+	https://lists.gnu.org/r/bug-gnulib/2021-07/msg00024.html
+	* m4/regex.m4 (gl_REGEX): Allow newer glibc behavior for ()0|\1,
+	behavior where the regex compiles but does not match.
+	Test for glibc bug 11053.
+	* tests/test-regex.c (bug_regex11, main): Add casts needed
+	for printf portability.
+	(main): Allow newer glibc behavior for ()0|\1.
+
 	regex: fix shell quoting problem in configuration
 	* m4/regex.m4 (gl_REGEX): Fix quoting problems.
 	These C programs are put into unquoted here-documents,
diff --git a/m4/regex.m4 b/m4/regex.m4
index 0e1bafef2..1c7e562f6 100644
--- a/m4/regex.m4
+++ b/m4/regex.m4
@@ -1,4 +1,4 @@
-# serial 72
+# serial 73
 
 # Copyright (C) 1996-2001, 2003-2021 Free Software Foundation, Inc.
 #
@@ -266,12 +266,48 @@ AC_DEFUN([gl_REGEX],
             memset (&regex, 0, sizeof regex);
             s = re_compile_pattern ("0|()0|\\\\1|0", 10, &regex);
             if (!s)
-              result |= 64;
+              {
+                memset (&regs, 0, sizeof regs);
+                i = re_search (&regex, "x", 1, 0, 1, &regs);
+                if (i != -1)
+                  result |= 64;
+                if (0 <= i)
+                  {
+                    free (regs.start);
+                    free (regs.end);
+                  }
+                regfree (&regex);
+              }
             else
               {
                 if (strcmp (s, "Invalid back reference"))
                   result |= 64;
+              }
+
+            /* glibc bug 11053.  */
+            re_set_syntax (RE_SYNTAX_POSIX_BASIC);
+            memset (&regex, 0, sizeof regex);
+            static char const pat_sub2[] = "\\\\(a*\\\\)*a*\\\\1";
+            s = re_compile_pattern (pat_sub2, sizeof pat_sub2 - 1, &regex);
+            if (s)
+              result |= 64;
+            else
+              {
+                memset (&regs, 0, sizeof regs);
+                static char const data[] = "a";
+                int datalen = sizeof data - 1;
+                i = re_search (&regex, data, datalen, 0, datalen, &regs);
+                if (i != 0)
+                  result |= 64;
+                else if (regs.num_regs < 2)
+                  result |= 64;
+                else if (! (regs.start[0] == 0 && regs.end[0] == 1))
+                  result |= 64;
+                else if (! (regs.start[1] == 0 && regs.end[1] == 0))
+                  result |= 64;
                 regfree (&regex);
+                free (regs.start);
+                free (regs.end);
               }
 
 #if 0
diff --git a/tests/test-regex.c b/tests/test-regex.c
index 7ea73cfb6..ed4ca64c0 100644
--- a/tests/test-regex.c
+++ b/tests/test-regex.c
@@ -155,7 +155,8 @@ bug_regex11 (void)
 	    if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1)
 	      break;
 	    report_error ("%s: regexec %zd match failure rm[%d] %d..%d",
-                          tests[i].pattern, i, n, rm[n].rm_so, rm[n].rm_eo);
+                          tests[i].pattern, i, n,
+                          (int) rm[n].rm_so, (int) rm[n].rm_eo);
 	    break;
 	  }
 
@@ -433,7 +434,7 @@ main (void)
                       pat_sub2, data, (int) regs.start[0], (int) regs.end[0]);
       else if (! (regs.start[1] == 0 && regs.end[1] == 0))
         report_error ("re_search '%s' on '%s' returned wrong submatch [%d,%d)",
-                      pat_sub2, data, regs.start[1], regs.end[1]);
+                      pat_sub2, data, (int) regs.start[1], (int) regs.end[1]);
       regfree (&regex);
       free (regs.start);
       free (regs.end);
@@ -466,9 +467,9 @@ main (void)
   memset (&regex, 0, sizeof regex);
   static char const pat_badback[] = "0|()0|\\1|0";
   s = re_compile_pattern (pat_badback, sizeof pat_badback, &regex);
-  if (!s)
-    s = "failed to report invalid back reference";
-  if (strcmp (s, "Invalid back reference") != 0)
+  if (!s && re_search (&regex, "x", 1, 0, 1, &regs) != -1)
+    s = "mishandled invalid back reference";
+  if (s && strcmp (s, "Invalid back reference") != 0)
     report_error ("%s: %s", pat_badback, s);
 
 #if 0
-- 
2.25.1

Reply via email to