Hi Bram! On Di, 21 Dez 2010, Bram Moolenaar wrote:
> > This would keep compatibility. Alternatively, couldn't something > > starting with a backslash be used, for example \{99}? > > That's already used. Well originally, I only cared about more than 9 capturing groups in the replacement part, as I seldom need more 1 or 2 in the search string. But I changed my patch, to also allow backreferences of all groups in the search string. > We could use \%99g, where "g" stands for group. That doesn't look nice. Oh well. Here we go, an updated patch, including test and documentation. regards, Christian -- You received this message from the "vim_dev" maillist. Do not top-post! Type your reply below the text you are replying to. For more information, visit http://www.vim.org/maillist.php
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -960,6 +960,18 @@ in the pattern (going left to right), NOT based on what is matched first. +\%1g Matches the same string, that matched with the first sub- */\%g* + expression, \( and \). Like \1, but this expression allows + more than 1 digit. + ... +\%99g Like \%1g, but this expression matches the 99th sub-expression + that was matched with \( and \). + Note: If there are no 99 subexpressions, \99g will replace + the match with an empty string. + Note also, that the numbering of groups is done based on which + "\(" comes first in the pattern (going left to right), NOT based + on what is matched first. + \%(\) A pattern enclosed by escaped parentheses. */\%(\)* */\%(* *E53* Just like \(\), but without counting it as a sub-expression. This allows using more groups and it's a little bit faster. diff --git a/src/regexp.c b/src/regexp.c --- a/src/regexp.c +++ b/src/regexp.c @@ -313,6 +313,7 @@ /* Obtain an operand that was stored as four bytes, MSB first. */ #define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \ + ((long)(p)[5] << 8) + (long)(p)[6]) +#define OPERAND_BYTE(p) ((int)(p)[3]) /* Obtain a second operand stored as four bytes. */ #define OPERAND_MAX(p) OPERAND_MIN((p) + 4) /* Obtain a second single-byte operand stored after a four bytes operand. */ @@ -1116,8 +1117,10 @@ else if ((OP(scan) == BOW || OP(scan) == EOW || OP(scan) == NOTHING - || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN - || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) + || (OP(scan) == MOPEN && OPERAND_BYTE(scan) == 0) + || OP(scan) == NOPEN + || (OP(scan) == MCLOSE && OPERAND_BYTE(scan) == 0) + || OP(scan) == NCLOSE) && OP(regnext(scan)) == EXACTLY) { #ifdef FEAT_MBYTE @@ -1245,7 +1248,11 @@ EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); parno = regnpar; ++regnpar; - ret = regnode(MOPEN + parno); + ret = regnode(MOPEN); + if (ret == JUST_CALC_SIZE) + regsize++; + else + *regcode++ = (char_u )parno; } else if (paren == REG_NPAREN) { @@ -1286,8 +1293,12 @@ #ifdef FEAT_SYN_HL paren == REG_ZPAREN ? ZCLOSE + parno : #endif - paren == REG_PAREN ? MCLOSE + parno : + paren == REG_PAREN ? MCLOSE : paren == REG_NPAREN ? NCLOSE : END); + if (ender == JUST_CALC_SIZE && paren == REG_PAREN) + regsize++; + else if (paren == REG_PAREN) + *regcode++ = (char_u )parno; regtail(ret, ender); /* Hook the tails of the branches to the closing node. */ @@ -1794,7 +1805,7 @@ case Magic('8'): case Magic('9'): { - int refnum; + int refnum; refnum = c - Magic('0'); /* @@ -1815,7 +1826,11 @@ if (*p == NUL) EMSG_RET_NULL(_("E65: Illegal back reference")); } - ret = regnode(BACKREF + refnum); + ret = regnode(BACKREF); + if (ret == JUST_CALC_SIZE) + regsize++; + else + *regcode++ = (char_u )refnum; } break; @@ -1851,10 +1866,18 @@ break; #endif - case 's': ret = regnode(MOPEN + 0); + case 's': ret = regnode(MOPEN); + if (ret == JUST_CALC_SIZE) + regsize++; + else + *regcode++ = (char_u )0; break; - case 'e': ret = regnode(MCLOSE + 0); + case 'e': ret = regnode(MCLOSE); + if (ret == JUST_CALC_SIZE) + regsize++; + else + *regcode++ = (char_u )0; break; default: EMSG_RET_NULL(_("E68: Invalid character after \\z")); @@ -2020,6 +2043,24 @@ } break; } + else if (c == 'g' && n > 0) + { + if (!had_endbrace[n]) + { + for (p = regparse; *p != NUL; ++p) + if (p[0] == '@' && p[1] == '<' + && (p[2] == '!' || p[2] == '=')) + break; + if (*p == NUL) + EMSG_RET_NULL(_("E65: Illegal back reference")); + } + ret = regnode(BACKREF); + if (ret == JUST_CALC_SIZE) + regsize++; + else + *regcode++ = (char_u )n; + break; + } else if (c == 'l' || c == 'c' || c == 'v') { if (c == 'l') @@ -4456,18 +4497,9 @@ } break; - case MOPEN + 0: /* Match start: \zs */ - case MOPEN + 1: /* \( */ - case MOPEN + 2: - case MOPEN + 3: - case MOPEN + 4: - case MOPEN + 5: - case MOPEN + 6: - case MOPEN + 7: - case MOPEN + 8: - case MOPEN + 9: + case MOPEN: /* Match start: \zs, \( */ { - no = op - MOPEN; + no = OPERAND_BYTE(scan); cleanup_subexpr(); rp = regstack_push(RS_MOPEN, scan); if (rp == NULL) @@ -4516,18 +4548,9 @@ break; #endif - case MCLOSE + 0: /* Match end: \ze */ - case MCLOSE + 1: /* \) */ - case MCLOSE + 2: - case MCLOSE + 3: - case MCLOSE + 4: - case MCLOSE + 5: - case MCLOSE + 6: - case MCLOSE + 7: - case MCLOSE + 8: - case MCLOSE + 9: + case MCLOSE: /* Match end: \ze \) */ { - no = op - MCLOSE; + no = OPERAND_BYTE(scan); cleanup_subexpr(); rp = regstack_push(RS_MCLOSE, scan); if (rp == NULL) @@ -4568,22 +4591,14 @@ break; #endif - case BACKREF + 1: - case BACKREF + 2: - case BACKREF + 3: - case BACKREF + 4: - case BACKREF + 5: - case BACKREF + 6: - case BACKREF + 7: - case BACKREF + 8: - case BACKREF + 9: + case BACKREF: { int len; linenr_T clnum; colnr_T ccol; char_u *p; - no = op - BACKREF; + no = OPERAND_BYTE(scan); cleanup_subexpr(); if (!REG_MULTI) /* Single-line regexp */ { @@ -7062,9 +7077,36 @@ ++src; no = 0; } + else if (*src != NUL && *src == '%') + { + int t = 0; + int found_no = 0; + src++; + while (*src != NUL && (('0' <= *src && *src <= '9') + || (*src == 'g'))) + { + if (*src != NUL && '0' <= *src && *src <= '9') + { + t = 10*t + *src - '0' ; + ++src; + if (!found_no) + found_no = TRUE; + } + else + break; + } + if (*src != NUL && *src == 'g' && found_no) + { + no = ( t == 0 ? no : t); + ++src; + } + else + EMSG(_("E65: Illegal back reference")); + } else if ('0' <= *src && *src <= '9') { - no = *src++ - '0'; + no = *src - '0'; + ++src; } else if (vim_strchr((char_u *)"uUlLeE", *src)) { diff --git a/src/regexp.h b/src/regexp.h --- a/src/regexp.h +++ b/src/regexp.h @@ -19,7 +19,7 @@ * The second one (index 1) is the first sub-match, referenced with "\1". * This goes up to the tenth (index 9), referenced with "\9". */ -#define NSUBEXP 10 +#define NSUBEXP 100 /* * Structure returned by vim_regcomp() to pass on to vim_regexec(). diff --git a/src/testdir/Make_amiga.mak b/src/testdir/Make_amiga.mak --- a/src/testdir/Make_amiga.mak +++ b/src/testdir/Make_amiga.mak @@ -28,7 +28,7 @@ test61.out test62.out test63.out test64.out test65.out \ test66.out test67.out test68.out test69.out test70.out \ test71.out test72.out test73.out test74.out test75.out \ - test76.out + test76.out test77.out .SUFFIXES: .in .out diff --git a/src/testdir/Make_dos.mak b/src/testdir/Make_dos.mak --- a/src/testdir/Make_dos.mak +++ b/src/testdir/Make_dos.mak @@ -28,7 +28,7 @@ test37.out test38.out test39.out test40.out test41.out \ test42.out test52.out test65.out test66.out test67.out \ test68.out test69.out test71.out test72.out test73.out \ - test74.out test75.out test76.out + test74.out test75.out test76.out test77.out SCRIPTS32 = test50.out test70.out diff --git a/src/testdir/Make_ming.mak b/src/testdir/Make_ming.mak --- a/src/testdir/Make_ming.mak +++ b/src/testdir/Make_ming.mak @@ -48,7 +48,7 @@ test37.out test38.out test39.out test40.out test41.out \ test42.out test52.out test65.out test66.out test67.out \ test68.out test69.out test71.out test72.out test73.out \ - test74.out test75.out test76.out + test74.out test75.out test76.out test77.out SCRIPTS32 = test50.out test70.out diff --git a/src/testdir/Make_os2.mak b/src/testdir/Make_os2.mak --- a/src/testdir/Make_os2.mak +++ b/src/testdir/Make_os2.mak @@ -28,7 +28,7 @@ test61.out test62.out test63.out test64.out test65.out \ test66.out test67.out test68.out test69.out test70.out \ test71.out test72.out test73.out test74.out test75.out \ - test76.out + test76.out test77.out .SUFFIXES: .in .out diff --git a/src/testdir/Make_vms.mms b/src/testdir/Make_vms.mms --- a/src/testdir/Make_vms.mms +++ b/src/testdir/Make_vms.mms @@ -74,7 +74,8 @@ test56.out test57.out test60.out \ test61.out test62.out test63.out test64.out test65.out \ test66.out test67.out test68.out test69.out \ - test71.out test72.out test74.out test75.out test76.out + test71.out test72.out test74.out test75.out test76.out \ + test77.out # Known problems: # Test 30: a problem around mac format - unknown reason diff --git a/src/testdir/Makefile b/src/testdir/Makefile --- a/src/testdir/Makefile +++ b/src/testdir/Makefile @@ -25,7 +25,7 @@ test59.out test60.out test61.out test62.out test63.out \ test64.out test65.out test66.out test67.out test68.out \ test69.out test70.out test71.out test72.out test73.out \ - test74.out test75.out test76.out + test74.out test75.out test76.out test77.out SCRIPTS_GUI = test16.out diff --git a/src/testdir/test77.in b/src/testdir/test77.in new file mode 100644 --- /dev/null +++ b/src/testdir/test77.in @@ -0,0 +1,30 @@ + +Test susbitution with more than 10 capturing groups + +STARTTEST +/^start-here +:" old style +j:s/\(.\)\(.\)\(.\)\(.\)/\4\3\2\1$/ +: " more than 10 capturing groups +j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%15g\%14g\%13g\%12g\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g +: " test \zs +j:s/.*\zs\(4\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g +: " test flag g +j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%7g\%6g\%5g\%4g\%3g\%2g\%1g/g +: " test \ze +j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\ze/\%7g\%6g\%5g\%4g\%3g\%2g\%1g/g +: " test if \11 is replaced by group 1 and a literal 1 has to be added +j:s/\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)/\%15g\%14g\%13g\%12g\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g\11 +: " backreference within the search pattern +j:s/^\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\%12g/\%15g\%14g\%13g\%12g\%11g\%10g\%9g\%8g\%7g\%6g\%5g\%4g\%3g\%2g\%1g\%12g +:/^start-here/+1,$wq! test.out +ENDTEST + +start-here +abcd +01234567890abcd +01234567890abcd +01234567890abcd +01234567890abcd +01234567890abcd +01234567890abcda diff --git a/src/testdir/test77.ok b/src/testdir/test77.ok new file mode 100644 --- /dev/null +++ b/src/testdir/test77.ok @@ -0,0 +1,7 @@ +dcba$ +dcba09876543210 +0123dcba0987654 +6543210cba0987d +6543210cba0987d +dcba0987654321001 +dcba09876543210a