Hi Bram!
On Di, 21 Dez 2010, Bram Moolenaar wrote:
> > This would keep compatibility. Alternatively, couldn't something
> > starting with a backslash be used, for example \{99}?
>
> That's already used.
Well originally, I only cared about more than 9 capturing groups in the
replacement part, as I seldom need more 1 or 2 in the search string. But
I changed my patch, to also allow backreferences of all groups in the
search string.
> We could use \%99g, where "g" stands for group.
That doesn't look nice. Oh well. Here we go, an updated patch, including
test and documentation.
regards,
Christian
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt
--- a/runtime/doc/pattern.txt
+++ b/runtime/doc/pattern.txt
@@ -960,6 +960,18 @@
in the pattern (going left to right), NOT based on what is matched
first.
+\%1g Matches the same string, that matched with the first sub- */\%g*
+ expression, \( and \). Like \1, but this expression allows
+ more than 1 digit.
+ ...
+\%99g Like \%1g, but this expression matches the 99th sub-expression
+ that was matched with \( and \).
+ Note: If there are no 99 subexpressions, \99g will replace
+ the match with an empty string.
+ Note also, that the numbering of groups is done based on which
+ "\(" comes first in the pattern (going left to right), NOT based
+ on what is matched first.
+
\%(\) A pattern enclosed by escaped parentheses. */\%(\)* */\%(* *E53*
Just like \(\), but without counting it as a sub-expression. This
allows using more groups and it's a little bit faster.
diff --git a/src/regexp.c b/src/regexp.c
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -313,6 +313,7 @@
/* Obtain an operand that was stored as four bytes, MSB first. */
#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
+ ((long)(p)[5] << 8) + (long)(p)[6])
+#define OPERAND_BYTE(p) ((int)(p)[3])
/* Obtain a second operand stored as four bytes. */
#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
/* Obtain a second single-byte operand stored after a four bytes operand. */
@@ -1116,8 +1117,10 @@
else if ((OP(scan) == BOW
|| OP(scan) == EOW
|| OP(scan) == NOTHING
- || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
- || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
+ || (OP(scan) == MOPEN && OPERAND_BYTE(scan) == 0)
+ || OP(scan) == NOPEN
+ || (OP(scan) == MCLOSE && OPERAND_BYTE(scan) == 0)
+ || OP(scan) == NCLOSE)
&& OP(regnext(scan)) == EXACTLY)
{
#ifdef FEAT_MBYTE
@@ -1245,7 +1248,11 @@
EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
parno = regnpar;
++regnpar;
- ret = regnode(MOPEN + parno);
+ ret = regnode(MOPEN);
+ if (ret == JUST_CALC_SIZE)
+ regsize++;
+ else
+ *regcode++ = (char_u )parno;
}
else if (paren == REG_NPAREN)
{
@@ -1286,8 +1293,12 @@
#ifdef FEAT_SYN_HL
paren == REG_ZPAREN ? ZCLOSE + parno :
#endif
- paren == REG_PAREN ? MCLOSE + parno :
+ paren == REG_PAREN ? MCLOSE :
paren == REG_NPAREN ? NCLOSE : END);
+ if (ender == JUST_CALC_SIZE && paren == REG_PAREN)
+ regsize++;
+ else if (paren == REG_PAREN)
+ *regcode++ = (char_u )parno;
regtail(ret, ender);
/* Hook the tails of the branches to the closing node. */
@@ -1794,7 +1805,7 @@
case Magic('8'):
case Magic('9'):
{
- int refnum;
+ int refnum;
refnum = c - Magic('0');
/*
@@ -1815,7 +1826,11 @@
if (*p == NUL)
EMSG_RET_NULL(_("E65: Illegal back reference"));
}
- ret = regnode(BACKREF + refnum);
+ ret = regnode(BACKREF);
+ if (ret == JUST_CALC_SIZE)
+ regsize++;
+ else
+ *regcode++ = (char_u )refnum;
}
break;
@@ -1851,10 +1866,18 @@
break;
#endif
- case 's': ret = regnode(MOPEN + 0);
+ case 's': ret = regnode(MOPEN);
+ if (ret == JUST_CALC_SIZE)
+ regsize++;
+ else
+ *regcode++ = (char_u )0;
break;
- case 'e': ret = regnode(MCLOSE + 0);
+ case 'e': ret = regnode(MCLOSE);
+ if (ret == JUST_CALC_SIZE)
+ regsize++;
+ else
+ *regcode++ = (char_u )0;
break;
default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
@@ -2020,6 +2043,24 @@
}
break;
}
+ else if (c == 'g' && n > 0)
+ {
+ if (!had_endbrace[n])
+ {
+ for (p = regparse; *p != NUL; ++p)
+ if (p[0] == '@' && p[1] == '<'
+ && (p[2] == '!' || p[2] == '='))
+ break;
+ if (*p == NUL)
+ EMSG_RET_NULL(_("E65: Illegal back reference"));
+ }
+ ret = regnode(BACKREF);
+ if (ret == JUST_CALC_SIZE)
+ regsize++;
+ else
+ *regcode++ = (char_u )n;
+ break;
+ }
else if (c == 'l' || c == 'c' || c == 'v')
{
if (c == 'l')
@@ -4456,18 +4497,9 @@
}
break;
- case MOPEN + 0: /* Match start: \zs */
- case MOPEN + 1: /* \( */
- case MOPEN + 2:
- case MOPEN + 3:
- case MOPEN + 4:
- case MOPEN + 5:
- case MOPEN + 6:
- case MOPEN + 7:
- case MOPEN + 8:
- case MOPEN + 9:
+ case MOPEN: /* Match start: \zs, \( */
{
- no = op - MOPEN;
+ no = OPERAND_BYTE(scan);
cleanup_subexpr();
rp = regstack_push(RS_MOPEN, scan);
if (rp == NULL)
@@ -4516,18 +4548,9 @@
break;
#endif
- case MCLOSE + 0: /* Match end: \ze */
- case MCLOSE + 1: /* \) */
- case MCLOSE + 2:
- case MCLOSE + 3:
- case MCLOSE + 4:
- case MCLOSE + 5:
- case MCLOSE + 6:
- case MCLOSE + 7:
- case MCLOSE + 8:
- case MCLOSE + 9:
+ case MCLOSE: /* Match end: \ze \) */
{
- no = op - MCLOSE;
+ no = OPERAND_BYTE(scan);
cleanup_subexpr();
rp = regstack_push(RS_MCLOSE, scan);
if (rp == NULL)
@@ -4568,22 +4591,14 @@
break;
#endif
- case BACKREF + 1:
- case BACKREF + 2:
- case BACKREF + 3:
- case BACKREF + 4:
- case BACKREF + 5:
- case BACKREF + 6:
- case BACKREF + 7:
- case BACKREF + 8:
- case BACKREF + 9:
+ case BACKREF:
{
int len;
linenr_T clnum;
colnr_T ccol;
char_u *p;
- no = op - BACKREF;
+ no = OPERAND_BYTE(scan);
cleanup_subexpr();
if (!REG_MULTI) /* Single-line regexp */
{
@@ -7062,9 +7077,36 @@
++src;
no = 0;
}
+ else if (*src != NUL && *src == '%')
+ {
+ int t = 0;
+ int found_no = 0;
+ src++;
+ while (*src != NUL && (('0' <= *src && *src <= '9')
+ || (*src == 'g')))
+ {
+ if (*src != NUL && '0' <= *src && *src <= '9')
+ {
+ t = 10*t + *src - '0' ;
+ ++src;
+ if (!found_no)
+ found_no = TRUE;
+ }
+ else
+ break;
+ }
+ if (*src != NUL && *src == 'g' && found_no)
+ {
+ no = ( t == 0 ? no : t);
+ ++src;
+ }
+ else
+ EMSG(_("E65: Illegal back reference"));
+ }
else if ('0' <= *src && *src <= '9')
{
- no = *src++ - '0';
+ no = *src - '0';
+ ++src;
}
else if (vim_strchr((char_u *)"uUlLeE", *src))
{
diff --git a/src/regexp.h b/src/regexp.h
--- a/src/regexp.h
+++ b/src/regexp.h
@@ -19,7 +19,7 @@
* The second one (index 1) is the first sub-match, referenced with "\1".
* This goes up to the tenth (index 9), referenced with "\9".
*/
-#define NSUBEXP 10
+#define NSUBEXP 100
/*
* Structure returned by vim_regcomp() to pass on to vim_regexec().
diff --git a/src/testdir/Make_amiga.mak b/src/testdir/Make_amiga.mak
--- a/src/testdir/Make_amiga.mak
+++ b/src/testdir/Make_amiga.mak
@@ -28,7 +28,7 @@
test61.out test62.out test63.out test64.out test65.out \
test66.out test67.out test68.out test69.out test70.out \
test71.out test72.out test73.out test74.out test75.out \
- test76.out
+ test76.out test77.out
.SUFFIXES: .in .out
diff --git a/src/testdir/Make_dos.mak b/src/testdir/Make_dos.mak
--- a/src/testdir/Make_dos.mak
+++ b/src/testdir/Make_dos.mak
@@ -28,7 +28,7 @@
test37.out test38.out test39.out test40.out test41.out \
test42.out test52.out test65.out test66.out test67.out \
test68.out test69.out test71.out test72.out test73.out \
- test74.out test75.out test76.out
+ test74.out test75.out test76.out test77.out
SCRIPTS32 = test50.out test70.out
diff --git a/src/testdir/Make_ming.mak b/src/testdir/Make_ming.mak
--- a/src/testdir/Make_ming.mak
+++ b/src/testdir/Make_ming.mak
@@ -48,7 +48,7 @@
test37.out test38.out test39.out test40.out test41.out \
test42.out test52.out test65.out test66.out test67.out \
test68.out test69.out test71.out test72.out test73.out \
- test74.out test75.out test76.out
+ test74.out test75.out test76.out test77.out
SCRIPTS32 = test50.out test70.out
diff --git a/src/testdir/Make_os2.mak b/src/testdir/Make_os2.mak
--- a/src/testdir/Make_os2.mak
+++ b/src/testdir/Make_os2.mak
@@ -28,7 +28,7 @@
test61.out test62.out test63.out test64.out test65.out \
test66.out test67.out test68.out test69.out test70.out \
test71.out test72.out test73.out test74.out test75.out \
- test76.out
+ test76.out test77.out
.SUFFIXES: .in .out
diff --git a/src/testdir/Make_vms.mms b/src/testdir/Make_vms.mms
--- a/src/testdir/Make_vms.mms
+++ b/src/testdir/Make_vms.mms
@@ -74,7 +74,8 @@
test56.out test57.out test60.out \
test61.out test62.out test63.out test64.out test65.out \
test66.out test67.out test68.out test69.out \
- test71.out test72.out test74.out test75.out test76.out
+ test71.out test72.out test74.out test75.out test76.out \
+ test77.out
# Known problems:
# Test 30: a problem around mac format - unknown reason
diff --git a/src/testdir/Makefile b/src/testdir/Makefile
--- a/src/testdir/Makefile
+++ b/src/testdir/Makefile
@@ -25,7 +25,7 @@
test59.out test60.out test61.out test62.out test63.out \
test64.out test65.out test66.out test67.out test68.out \
test69.out test70.out test71.out test72.out test73.out \
- test74.out test75.out test76.out
+ test74.out test75.out test76.out test77.out
SCRIPTS_GUI = test16.out
diff --git a/src/testdir/test77.in b/src/testdir/test77.in
new file mode 100644
--- /dev/null
+++ b/src/testdir/test77.in
@@ -0,0 +1,30 @@
+
+Test susbitution with more than 10 capturing groups
+
+STARTTEST
+/^start-here
+:" old style
+j:s/\(.\)\(.\)\(.\)\(.\)/\4\3\2\1$/
+: " more than 10 capturing groups
+j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%15g\%14g\%13g\%12g\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g
+: " test \zs
+j:s/.*\zs\(4\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g
+: " test flag g
+j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%7g\%6g\%5g\%4g\%3g\%2g\%1g/g
+: " test \ze
+j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\ze/\%7g\%6g\%5g\%4g\%3g\%2g\%1g/g
+: " test if \11 is replaced by group 1 and a literal 1 has to be added
+j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%15g\%14g\%13g\%12g\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g\11
+: " backreference within the search pattern
+j:s/^\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\%12g/\%15g\%14g\%13g\%12g\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g\%12g
+:/^start-here/+1,$wq! test.out
+ENDTEST
+
+start-here
+abcd
+01234567890abcd
+01234567890abcd
+01234567890abcd
+01234567890abcd
+01234567890abcd
+01234567890abcda
diff --git a/src/testdir/test77.ok b/src/testdir/test77.ok
new file mode 100644
--- /dev/null
+++ b/src/testdir/test77.ok
@@ -0,0 +1,7 @@
+dcba$
+dcba09876543210
+0123dcba0987654
+6543210cba0987d
+6543210cba0987d
+dcba0987654321001
+dcba09876543210a