From 0a1879892d21f7df1cb11e6c98c559609faf41a7 Mon Sep 17 00:00:00 2001
From: Mark Dilger <mark.dilger@enterprisedb.com>
Date: Sun, 22 Aug 2021 14:51:37 -0700
Subject: [PATCH v1] Distinguishing regular expression backref errors

To make debugging regular expressions easier, use a different error
message for backreferences within lookaround assertions than for
backreferences with no valid referent.  The error message "invalid
backreference number" was being used for both, which is confusing
when the backreference number is valid.
---
 src/backend/regex/regcomp.c                   |  2 +-
 src/include/regex/regerrs.h                   |  4 +++
 src/include/regex/regex.h                     | 29 ++++++++++---------
 .../test_regex/expected/test_regex.out        |  4 +--
 .../modules/test_regex/sql/test_regex.sql     |  2 +-
 src/test/regress/expected/regex.out           |  4 +--
 6 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index ae3a7b6a38..9a9b57e307 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -1013,7 +1013,7 @@ parseqatom(struct vars *v,
 			/* postpone everything else pending possible {0} */
 			break;
 		case BACKREF:			/* the Feature From The Black Lagoon */
-			INSIST(type != LACON, REG_ESUBREG);
+			INSIST(type != LACON, REG_ENOBREF);
 			subno = v->nextvalue;
 			assert(subno > 0);
 			INSIST(subno < v->nsubs, REG_ESUBREG);
diff --git a/src/include/regex/regerrs.h b/src/include/regex/regerrs.h
index 41e25f7ff0..09c3f6f41c 100644
--- a/src/include/regex/regerrs.h
+++ b/src/include/regex/regerrs.h
@@ -30,6 +30,10 @@
 	REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"
 },
 
+{
+	REG_ENOBREF, "REG_ENOBREF", "backreference in lookaround assertion"
+},
+
 {
 	REG_EBRACK, "REG_EBRACK", "brackets [] not balanced"
 },
diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h
index 0455ae8069..d38ef4ab6a 100644
--- a/src/include/regex/regex.h
+++ b/src/include/regex/regex.h
@@ -143,20 +143,21 @@ typedef struct
 #define REG_ECTYPE	 4			/* invalid character class */
 #define REG_EESCAPE  5			/* invalid escape \ sequence */
 #define REG_ESUBREG  6			/* invalid backreference number */
-#define REG_EBRACK	 7			/* brackets [] not balanced */
-#define REG_EPAREN	 8			/* parentheses () not balanced */
-#define REG_EBRACE	 9			/* braces {} not balanced */
-#define REG_BADBR	10			/* invalid repetition count(s) */
-#define REG_ERANGE	11			/* invalid character range */
-#define REG_ESPACE	12			/* out of memory */
-#define REG_BADRPT	13			/* quantifier operand invalid */
-#define REG_ASSERT	15			/* "can't happen" -- you found a bug */
-#define REG_INVARG	16			/* invalid argument to regex function */
-#define REG_MIXED	17			/* character widths of regex and string differ */
-#define REG_BADOPT	18			/* invalid embedded option */
-#define REG_ETOOBIG 19			/* regular expression is too complex */
-#define REG_ECOLORS 20			/* too many colors */
-#define REG_CANCEL	21			/* operation cancelled */
+#define REG_ENOBREF	 7			/* backreference in lookaround assertion */
+#define REG_EBRACK 	 8 			/* brackets [] not balanced */
+#define REG_EPAREN 	 9 			/* parentheses () not balanced */
+#define REG_EBRACE  10 			/* braces {} not balanced */
+#define REG_BADBR  	11 			/* invalid repetition count(s) */
+#define REG_ERANGE 	12 			/* invalid character range */
+#define REG_ESPACE 	13 			/* out of memory */
+#define REG_BADRPT 	15 			/* quantifier operand invalid */
+#define REG_ASSERT 	16 			/* "can't happen" -- you found a bug */
+#define REG_INVARG	17			/* invalid argument to regex function */
+#define REG_MIXED	18			/* character widths of regex and string differ */
+#define REG_BADOPT	19			/* invalid embedded option */
+#define REG_ETOOBIG 20			/* regular expression is too complex */
+#define REG_ECOLORS 21			/* too many colors */
+#define REG_CANCEL	22			/* operation cancelled */
 /* two specials for debugging and testing */
 #define REG_ATOI	101			/* convert error-code name to number */
 #define REG_ITOA	102			/* convert error-code number to name */
diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out
index 6242d0baa9..20cbd5108d 100644
--- a/src/test/modules/test_regex/expected/test_regex.out
+++ b/src/test/modules/test_regex/expected/test_regex.out
@@ -507,9 +507,9 @@ select * from test_regex('?', '', '-');
 ERROR:  invalid regular expression: quantifier operand invalid
 -- These two are not yet incorporated in Tcl, cf
 -- https://core.tcl-lang.org/tcl/tktview?name=5ea71fdcd3291c38
--- expectError	6.21 -		{x(\w)(?=(\1))}	ESUBREG
+-- expectError	6.21 -		{x(\w)(?=(\1))}	ENOBREF
 select * from test_regex('x(\w)(?=(\1))', '', '-');
-ERROR:  invalid regular expression: invalid backreference number
+ERROR:  invalid regular expression: backreference in lookaround assertion
 -- expectMatch	6.22 HP		{x(?=((foo)))}	xfoo	x
 select * from test_regex('x(?=((foo)))', 'xfoo', 'HP');
             test_regex             
diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql
index 389b8b61b3..bfaec8865f 100644
--- a/src/test/modules/test_regex/sql/test_regex.sql
+++ b/src/test/modules/test_regex/sql/test_regex.sql
@@ -172,7 +172,7 @@ select * from test_regex('?', '', '-');
 
 -- These two are not yet incorporated in Tcl, cf
 -- https://core.tcl-lang.org/tcl/tktview?name=5ea71fdcd3291c38
--- expectError	6.21 -		{x(\w)(?=(\1))}	ESUBREG
+-- expectError	6.21 -		{x(\w)(?=(\1))}	ENOBREF
 select * from test_regex('x(\w)(?=(\1))', '', '-');
 -- expectMatch	6.22 HP		{x(?=((foo)))}	xfoo	x
 select * from test_regex('x(?=((foo)))', 'xfoo', 'HP');
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out
index 86477cc506..7074d876ae 100644
--- a/src/test/regress/expected/regex.out
+++ b/src/test/regress/expected/regex.out
@@ -606,8 +606,8 @@ select regexp_match('foo', '(?:.|){99}');
 
 -- Error conditions
 select 'xyz' ~ 'x(\w)(?=\1)';  -- no backrefs in LACONs
-ERROR:  invalid regular expression: invalid backreference number
+ERROR:  invalid regular expression: backreference in lookaround assertion
 select 'xyz' ~ 'x(\w)(?=(\1))';
-ERROR:  invalid regular expression: invalid backreference number
+ERROR:  invalid regular expression: backreference in lookaround assertion
 select 'a' ~ '\x7fffffff';  -- invalid chr code
 ERROR:  invalid regular expression: invalid escape \ sequence
-- 
2.21.1 (Apple Git-122.3)

