From 8c22765403cc34e87ad953cb3f6901e723c937f5 Mon Sep 17 00:00:00 2001
From: Jim Meyering <meyering@meta.com>
Date: Sun, 12 Apr 2026 10:27:03 -0700
Subject: [PATCH] regex: fix false match with backrefs and $ anchor

Use of sed or grep with both backreferences and an end-of-line
anchor could get false matches.  For example, this grep command
would falsely declare "ab" to be a palindrome:
  grep -E '^(.?)(.?).?\2\1$' <<< ab
In prune_impossible_nodes, falling back to a shorter match
skipped the halt-state context check, so $ was not verified.
* lib/regexec.c (prune_impossible_nodes): Also require
check_halt_state_context to succeed in the loop that searches
for an earlier halt state.
* m4/regex.m4: Bump serial to 82.  Reject any system regex
implementation with this bug.
* tests/test-regex.c (main): Add a test for this bug.
Reported by Ed Morton in https://bugs.gnu.org/68725
---
 ChangeLog          | 17 +++++++++++++++++
 lib/regexec.c      |  5 ++++-
 m4/regex.m4        | 21 ++++++++++++++++++++-
 tests/test-regex.c | 17 +++++++++++++++++
 4 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index eaaef305f6..ee53c3f675 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2026-04-12  Jim Meyering  <meyering@meta.com>
+
+	regex: fix false match with backrefs and $ anchor
+	Use of sed or grep with both backreferences and an end-of-line
+	anchor could get false matches.  For example, this grep command
+	would falsely declare "ab" to be a palindrome:
+	  grep -E '^(.?)(.?).?\2\1$' <<< ab
+	In prune_impossible_nodes, falling back to a shorter match
+	skipped the halt-state context check, so $ was not verified.
+	* lib/regexec.c (prune_impossible_nodes): Also require
+	check_halt_state_context to succeed in the loop that searches
+	for an earlier halt state.
+	* m4/regex.m4: Bump serial to 82.  Reject any system regex
+	implementation with this bug.
+	* tests/test-regex.c (main): Add a test for this bug.
+	Reported by Ed Morton in https://bugs.gnu.org/68725
+
 2026-04-11  Paul Eggert  <eggert@cs.ucla.edu>

 	tempname: call ‘clock’ only if !CLOCK_REALTIME
diff --git a/lib/regexec.c b/lib/regexec.c
index c84ce1ef33..e09fc7698e 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -943,7 +943,10 @@ prune_impossible_nodes (re_match_context_t *mctx)
 		  goto free_return;
 		}
 	    } while (mctx->state_log[match_last] == NULL
-		     || !mctx->state_log[match_last]->halt);
+		     || !mctx->state_log[match_last]->halt
+		     || !check_halt_state_context (mctx,
+						   mctx->state_log[match_last],
+						   match_last));
 	  halt_node = check_halt_state_context (mctx,
 						mctx->state_log[match_last],
 						match_last);
diff --git a/m4/regex.m4 b/m4/regex.m4
index 45a1049067..c36de81011 100644
--- a/m4/regex.m4
+++ b/m4/regex.m4
@@ -1,5 +1,5 @@
 # regex.m4
-# serial 81
+# serial 82
 dnl Copyright (C) 1996-2001, 2003-2026 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -318,6 +318,25 @@ AC_DEFUN([gl_REGEX],
                 free (regs.end);
               }

+            /* This test is derived from bug#68725, reported by Ed Morton.
+               The regex uses backrefs to detect palindromes and "ab"
+               is not a palindrome, so this should not match.  */
+            {
+              regex_t re68725;
+              i = regcomp (&re68725,
+                           "^(.?)(.?).?\\\\2\\\\1$",
+                           REG_EXTENDED);
+              if (i)
+                result |= 64;
+              else
+                {
+                  regmatch_t pm;
+                  if (regexec (&re68725, "ab", 1, &pm, 0) == 0)
+                    result |= 64;
+                  regfree (&re68725);
+                }
+            }
+
 #if 0
             /* It would be nice to reject hosts whose regoff_t values are too
                narrow (including glibc on hosts with 64-bit ptrdiff_t and
diff --git a/tests/test-regex.c b/tests/test-regex.c
index b139d7d3c0..87c03834f5 100644
--- a/tests/test-regex.c
+++ b/tests/test-regex.c
@@ -472,6 +472,23 @@ main (void)
   if (s && !streq (s, "Invalid back reference"))
     report_error ("%s: %s", pat_badback, s);

+  /* bug#68725, reported by Ed Morton.
+     The regex uses backrefs to detect palindromes and "ab"
+     is not a palindrome, so this should not match.  */
+  {
+    regex_t re68725;
+    int ret = regcomp (&re68725, "^(.?)(.?).?\\2\\1$", REG_EXTENDED);
+    if (ret)
+      report_error ("regcomp bug#68725 failed (%d)", ret);
+    else
+      {
+        regmatch_t pm;
+        if (regexec (&re68725, "ab", 1, &pm, 0) == 0)
+          report_error ("regexec bug#68725: \"ab\" matched, should not");
+        regfree (&re68725);
+      }
+  }
+
 #if 0
   /* It would be nice to reject hosts whose regoff_t values are too
      narrow (including glibc on hosts with 64-bit ptrdiff_t and
-- 
2.54.0.rc1.65.g8c9303b1ff

