Hi Bram!

On Di, 21 Dez 2010, Bram Moolenaar wrote:

> > This would keep compatibility. Alternatively, couldn't something
> > starting with a backslash be used, for example \{99}?
> 
> That's already used. 

Well originally, I only cared about more than 9 capturing groups in the 
replacement part, as I seldom need more 1 or 2 in the search string. But 
I changed my patch, to also allow backreferences of all groups in the 
search string.

>  We could use \%99g, where "g" stands for group.

That doesn't look nice. Oh well. Here we go, an updated patch, including 
test and documentation.

regards,
Christian

-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt
--- a/runtime/doc/pattern.txt
+++ b/runtime/doc/pattern.txt
@@ -960,6 +960,18 @@
 	in the pattern (going left to right), NOT based on what is matched
 	first.
 
+\%1g    Matches the same string, that matched with the first sub-   */\%g*
+        expression, \( and \). Like \1, but this expression allows
+	more than 1 digit.
+   ...
+\%99g   Like \%1g, but this expression matches the 99th sub-expression
+        that was matched with \( and \).
+	Note: If there are no 99 subexpressions, \99g will replace 
+	the match with an empty string.
+	Note also, that the numbering of groups is done based on which
+	"\(" comes first in the pattern (going left to right), NOT based
+	on what is matched first.
+
 \%(\)	A pattern enclosed by escaped parentheses.	*/\%(\)* */\%(* *E53*
 	Just like \(\), but without counting it as a sub-expression.  This
 	allows using more groups and it's a little bit faster.
diff --git a/src/regexp.c b/src/regexp.c
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -313,6 +313,7 @@
 /* Obtain an operand that was stored as four bytes, MSB first. */
 #define OPERAND_MIN(p)	(((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
 			+ ((long)(p)[5] << 8) + (long)(p)[6])
+#define OPERAND_BYTE(p)	((int)(p)[3]) 
 /* Obtain a second operand stored as four bytes. */
 #define OPERAND_MAX(p)	OPERAND_MIN((p) + 4)
 /* Obtain a second single-byte operand stored after a four bytes operand. */
@@ -1116,8 +1117,10 @@
 	else if ((OP(scan) == BOW
 		    || OP(scan) == EOW
 		    || OP(scan) == NOTHING
-		    || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
-		    || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
+		    || (OP(scan) == MOPEN && OPERAND_BYTE(scan) == 0)
+		    || OP(scan) == NOPEN
+		    || (OP(scan) == MCLOSE && OPERAND_BYTE(scan) == 0)
+		    || OP(scan) == NCLOSE)
 		 && OP(regnext(scan)) == EXACTLY)
 	{
 #ifdef FEAT_MBYTE
@@ -1245,7 +1248,11 @@
 	    EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
 	parno = regnpar;
 	++regnpar;
-	ret = regnode(MOPEN + parno);
+	ret = regnode(MOPEN);
+	if (ret == JUST_CALC_SIZE)
+	  regsize++;
+	else
+	    *regcode++ = (char_u )parno;
     }
     else if (paren == REG_NPAREN)
     {
@@ -1286,8 +1293,12 @@
 #ifdef FEAT_SYN_HL
 	    paren == REG_ZPAREN ? ZCLOSE + parno :
 #endif
-	    paren == REG_PAREN ? MCLOSE + parno :
+	    paren == REG_PAREN ? MCLOSE :
 	    paren == REG_NPAREN ? NCLOSE : END);
+    if (ender == JUST_CALC_SIZE && paren == REG_PAREN)
+      regsize++;
+    else if (paren == REG_PAREN)
+	*regcode++ = (char_u )parno;
     regtail(ret, ender);
 
     /* Hook the tails of the branches to the closing node. */
@@ -1794,7 +1805,7 @@
       case Magic('8'):
       case Magic('9'):
 	    {
-		int		    refnum;
+		int	refnum;
 
 		refnum = c - Magic('0');
 		/*
@@ -1815,7 +1826,11 @@
 		    if (*p == NUL)
 			EMSG_RET_NULL(_("E65: Illegal back reference"));
 		}
-		ret = regnode(BACKREF + refnum);
+		ret = regnode(BACKREF);
+		if (ret == JUST_CALC_SIZE)
+		    regsize++;
+		else
+		    *regcode++ = (char_u )refnum;
 	    }
 	    break;
 
@@ -1851,10 +1866,18 @@
 			  break;
 #endif
 
-		case 's': ret = regnode(MOPEN + 0);
+		case 's': ret = regnode(MOPEN);
+			  if (ret == JUST_CALC_SIZE)
+			      regsize++;
+			  else
+			      *regcode++ = (char_u )0;
 			  break;
 
-		case 'e': ret = regnode(MCLOSE + 0);
+		case 'e': ret = regnode(MCLOSE);
+			  if (ret == JUST_CALC_SIZE)
+			      regsize++;
+			  else
+			      *regcode++ = (char_u )0;
 			  break;
 
 		default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
@@ -2020,6 +2043,24 @@
 				  }
 				  break;
 			      }
+			      else if (c == 'g' && n > 0)
+			      {
+				  if (!had_endbrace[n])
+				  {
+				      for (p = regparse; *p != NUL; ++p)
+					  if (p[0] == '@' && p[1] == '<'
+								&& (p[2] == '!' || p[2] == '='))
+					      break;
+				      if (*p == NUL)
+					  EMSG_RET_NULL(_("E65: Illegal back reference"));
+				  }
+				  ret = regnode(BACKREF);
+				  if (ret == JUST_CALC_SIZE)
+				      regsize++;
+				  else
+				      *regcode++ = (char_u )n;
+				  break;
+			      }
 			      else if (c == 'l' || c == 'c' || c == 'v')
 			      {
 				  if (c == 'l')
@@ -4456,18 +4497,9 @@
 	    }
 	    break;
 
-	  case MOPEN + 0:   /* Match start: \zs */
-	  case MOPEN + 1:   /* \( */
-	  case MOPEN + 2:
-	  case MOPEN + 3:
-	  case MOPEN + 4:
-	  case MOPEN + 5:
-	  case MOPEN + 6:
-	  case MOPEN + 7:
-	  case MOPEN + 8:
-	  case MOPEN + 9:
+	  case MOPEN:   /* Match start: \zs, \( */
 	    {
-		no = op - MOPEN;
+		no = OPERAND_BYTE(scan);
 		cleanup_subexpr();
 		rp = regstack_push(RS_MOPEN, scan);
 		if (rp == NULL)
@@ -4516,18 +4548,9 @@
 	    break;
 #endif
 
-	  case MCLOSE + 0:  /* Match end: \ze */
-	  case MCLOSE + 1:  /* \) */
-	  case MCLOSE + 2:
-	  case MCLOSE + 3:
-	  case MCLOSE + 4:
-	  case MCLOSE + 5:
-	  case MCLOSE + 6:
-	  case MCLOSE + 7:
-	  case MCLOSE + 8:
-	  case MCLOSE + 9:
+	  case MCLOSE:  /* Match end: \ze \) */
 	    {
-		no = op - MCLOSE;
+		no = OPERAND_BYTE(scan);
 		cleanup_subexpr();
 		rp = regstack_push(RS_MCLOSE, scan);
 		if (rp == NULL)
@@ -4568,22 +4591,14 @@
 	    break;
 #endif
 
-	  case BACKREF + 1:
-	  case BACKREF + 2:
-	  case BACKREF + 3:
-	  case BACKREF + 4:
-	  case BACKREF + 5:
-	  case BACKREF + 6:
-	  case BACKREF + 7:
-	  case BACKREF + 8:
-	  case BACKREF + 9:
+	  case BACKREF:
 	    {
 		int		len;
 		linenr_T	clnum;
 		colnr_T		ccol;
 		char_u		*p;
 
-		no = op - BACKREF;
+		no = OPERAND_BYTE(scan);
 		cleanup_subexpr();
 		if (!REG_MULTI)		/* Single-line regexp */
 		{
@@ -7062,9 +7077,36 @@
 		++src;
 		no = 0;
 	    }
+	    else if (*src != NUL && *src == '%')
+	    {
+		    int t = 0;
+		    int found_no = 0;
+		    src++;
+		    while (*src != NUL && (('0' <= *src && *src <= '9')
+			|| (*src == 'g')))
+		    {
+			if (*src != NUL && '0' <= *src && *src <= '9')
+			{
+			    t = 10*t + *src - '0' ;
+			    ++src;
+			    if (!found_no)
+				found_no = TRUE;
+			}
+			else
+			    break;
+		    }
+		    if (*src != NUL && *src == 'g' && found_no)
+		    {
+			no = ( t == 0 ? no : t);
+			++src;
+		    }
+		    else
+			EMSG(_("E65: Illegal back reference"));
+	    }
 	    else if ('0' <= *src && *src <= '9')
 	    {
-		no = *src++ - '0';
+		no = *src - '0';
+		++src;
 	    }
 	    else if (vim_strchr((char_u *)"uUlLeE", *src))
 	    {
diff --git a/src/regexp.h b/src/regexp.h
--- a/src/regexp.h
+++ b/src/regexp.h
@@ -19,7 +19,7 @@
  * The second one (index 1) is the first sub-match, referenced with "\1".
  * This goes up to the tenth (index 9), referenced with "\9".
  */
-#define NSUBEXP  10
+#define NSUBEXP  100
 
 /*
  * Structure returned by vim_regcomp() to pass on to vim_regexec().
diff --git a/src/testdir/Make_amiga.mak b/src/testdir/Make_amiga.mak
--- a/src/testdir/Make_amiga.mak
+++ b/src/testdir/Make_amiga.mak
@@ -28,7 +28,7 @@
 		test61.out test62.out test63.out test64.out test65.out \
 		test66.out test67.out test68.out test69.out test70.out \
 		test71.out test72.out test73.out test74.out test75.out \
-		test76.out
+		test76.out test77.out
 
 .SUFFIXES: .in .out
 
diff --git a/src/testdir/Make_dos.mak b/src/testdir/Make_dos.mak
--- a/src/testdir/Make_dos.mak
+++ b/src/testdir/Make_dos.mak
@@ -28,7 +28,7 @@
 		test37.out test38.out test39.out test40.out test41.out \
 		test42.out test52.out test65.out test66.out test67.out \
 		test68.out test69.out test71.out test72.out test73.out \
-		test74.out test75.out test76.out
+		test74.out test75.out test76.out test77.out
 
 SCRIPTS32 =	test50.out test70.out
 
diff --git a/src/testdir/Make_ming.mak b/src/testdir/Make_ming.mak
--- a/src/testdir/Make_ming.mak
+++ b/src/testdir/Make_ming.mak
@@ -48,7 +48,7 @@
 		test37.out test38.out test39.out test40.out test41.out \
 		test42.out test52.out test65.out test66.out test67.out \
 		test68.out test69.out test71.out test72.out test73.out \
-		test74.out test75.out test76.out
+		test74.out test75.out test76.out test77.out
 
 SCRIPTS32 =	test50.out test70.out
 
diff --git a/src/testdir/Make_os2.mak b/src/testdir/Make_os2.mak
--- a/src/testdir/Make_os2.mak
+++ b/src/testdir/Make_os2.mak
@@ -28,7 +28,7 @@
 		test61.out test62.out test63.out test64.out test65.out \
 		test66.out test67.out test68.out test69.out test70.out \
 		test71.out test72.out test73.out test74.out test75.out \
-		test76.out
+		test76.out test77.out
 
 .SUFFIXES: .in .out
 
diff --git a/src/testdir/Make_vms.mms b/src/testdir/Make_vms.mms
--- a/src/testdir/Make_vms.mms
+++ b/src/testdir/Make_vms.mms
@@ -74,7 +74,8 @@
 	 test56.out test57.out test60.out \
 	 test61.out test62.out test63.out test64.out test65.out \
 	 test66.out test67.out test68.out test69.out \
-	 test71.out test72.out test74.out test75.out test76.out
+	 test71.out test72.out test74.out test75.out test76.out \
+	 test77.out
 
 # Known problems:
 # Test 30: a problem around mac format - unknown reason
diff --git a/src/testdir/Makefile b/src/testdir/Makefile
--- a/src/testdir/Makefile
+++ b/src/testdir/Makefile
@@ -25,7 +25,7 @@
 		test59.out test60.out test61.out test62.out test63.out \
 		test64.out test65.out test66.out test67.out test68.out \
 		test69.out test70.out test71.out test72.out test73.out \
-		test74.out test75.out test76.out
+		test74.out test75.out test76.out test77.out
 
 SCRIPTS_GUI = test16.out
 
diff --git a/src/testdir/test77.in b/src/testdir/test77.in
new file mode 100644
--- /dev/null
+++ b/src/testdir/test77.in
@@ -0,0 +1,30 @@
+
+Test susbitution with more than 10 capturing groups
+
+STARTTEST
+/^start-here
+:" old style
+j:s/\(.\)\(.\)\(.\)\(.\)/\4\3\2\1$/
+: " more than 10 capturing groups
+j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%15g\%14g\%13g\%12g\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g
+: " test \zs
+j:s/.*\zs\(4\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g
+: " test flag g
+j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%7g\%6g\%5g\%4g\%3g\%2g\%1g/g
+: " test \ze
+j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\ze/\%7g\%6g\%5g\%4g\%3g\%2g\%1g/g
+: " test if \11 is replaced by group 1 and a literal 1 has to be added
+j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%15g\%14g\%13g\%12g\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g\11
+: " backreference within the search pattern
+j:s/^\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\%12g/\%15g\%14g\%13g\%12g\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g\%12g
+:/^start-here/+1,$wq! test.out
+ENDTEST
+
+start-here
+abcd
+01234567890abcd
+01234567890abcd
+01234567890abcd
+01234567890abcd
+01234567890abcd
+01234567890abcda
diff --git a/src/testdir/test77.ok b/src/testdir/test77.ok
new file mode 100644
--- /dev/null
+++ b/src/testdir/test77.ok
@@ -0,0 +1,7 @@
+dcba$
+dcba09876543210
+0123dcba0987654
+6543210cba0987d
+6543210cba0987d
+dcba0987654321001
+dcba09876543210a

Raspunde prin e-mail lui