From: Ito Kazumitsu <[EMAIL PROTECTED]>
Date: Wed, 08 Feb 2006 00:01:46 +0900 (JST)
> I added support for backslash escape in a replacement as an
> optional feature for fear that some application may depend
> on the current behavior of gnu.regexp.
Reading Sun's API docment, I found that the replacement
may contain not only $1-$9 but also $10, $11, $12, ...
2006-02-07 Ito Kazumitsu <[EMAIL PROTECTED]>
Fixes bug #26112
* gnu/regexp/RE.java(REG_REPLACE_USE_BACKSLASHESCAPE): New execution
flag which enables backslash escape in a replacement.
(getReplacement): New public static method.
(substituteImpl),(substituteAllImpl): Use getReplacement.
* gnu/regexp/REMatch.java(substituteInto): Replace $n even if n>=10.
* java/util/regex/Matcher.java(appendReplacement)
Use RE#getReplacement.
(replaceFirst),(replaceAll): Use RE.REG_REPLACE_USE_BACKSLASHESCAPE.
Index: classpath/gnu/regexp/RE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v
retrieving revision 1.16
diff -u -r1.16 RE.java
--- classpath/gnu/regexp/RE.java 6 Feb 2006 14:03:59 -0000 1.16
+++ classpath/gnu/regexp/RE.java 8 Feb 2006 13:03:00 -0000
@@ -142,7 +142,7 @@
* Compilation flag. Do not differentiate case. Subsequent
* searches using this RE will be case insensitive.
*/
- public static final int REG_ICASE = 2;
+ public static final int REG_ICASE = 0x02;
/**
* Compilation flag. The match-any-character operator (dot)
@@ -150,14 +150,14 @@
* bit RE_DOT_NEWLINE (see RESyntax for details). This is equivalent to
* the "/s" operator in Perl.
*/
- public static final int REG_DOT_NEWLINE = 4;
+ public static final int REG_DOT_NEWLINE = 0x04;
/**
* Compilation flag. Use multiline mode. In this mode, the ^ and $
* anchors will match based on newlines within the input. This is
* equivalent to the "/m" operator in Perl.
*/
- public static final int REG_MULTILINE = 8;
+ public static final int REG_MULTILINE = 0x08;
/**
* Execution flag.
@@ -186,14 +186,14 @@
* // m4.toString(): "fool"<BR>
* </CODE>
*/
- public static final int REG_NOTBOL = 16;
+ public static final int REG_NOTBOL = 0x10;
/**
* Execution flag.
* The match-end operator ($) does not match at the end
* of the input string. Useful for matching on substrings.
*/
- public static final int REG_NOTEOL = 32;
+ public static final int REG_NOTEOL = 0x20;
/**
* Execution flag.
@@ -207,7 +207,7 @@
* the example under REG_NOTBOL. It also affects the use of the \<
* and \b operators.
*/
- public static final int REG_ANCHORINDEX = 64;
+ public static final int REG_ANCHORINDEX = 0x40;
/**
* Execution flag.
@@ -216,14 +216,24 @@
* the corresponding subexpressions. For example, you may want to
* replace all matches of "one dollar" with "$1".
*/
- public static final int REG_NO_INTERPOLATE = 128;
+ public static final int REG_NO_INTERPOLATE = 0x80;
/**
* Execution flag.
* Try to match the whole input string. An implicit match-end operator
* is added to this regexp.
*/
- public static final int REG_TRY_ENTIRE_MATCH = 256;
+ public static final int REG_TRY_ENTIRE_MATCH = 0x0100;
+
+ /**
+ * Execution flag.
+ * The substitute and substituteAll methods will treat the
+ * character '\' in the replacement as an escape to a literal
+ * character. In this case "\n", "\$", "\\", "\x40" and "\012"
+ * will become "n", "$", "\", "x40" and "012" respectively.
+ * This flag has no effect if REG_NO_INTERPOLATE is set on.
+ */
+ public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200;
/** Returns a string representing the version of the gnu.regexp package. */
public static final String version() {
@@ -1614,8 +1624,7 @@
StringBuffer buffer = new StringBuffer();
REMatch m = getMatchImpl(input,index,eflags,buffer);
if (m==null) return buffer.toString();
- buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
- replace : m.substituteInto(replace) );
+ buffer.append(getReplacement(replace, m, eflags));
if (input.move(m.end[0])) {
do {
buffer.append(input.charAt(0));
@@ -1676,8 +1685,7 @@
StringBuffer buffer = new StringBuffer();
REMatch m;
while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
- buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
- replace : m.substituteInto(replace) );
+ buffer.append(getReplacement(replace, m, eflags));
index = m.getEndIndex();
if (m.end[0] == 0) {
char ch = input.charAt(0);
@@ -1692,6 +1700,39 @@
}
return buffer.toString();
}
+
+ public static String getReplacement(String replace, REMatch m, int eflags) {
+ if ((eflags & REG_NO_INTERPOLATE) > 0)
+ return replace;
+ else {
+ if ((eflags & REG_REPLACE_USE_BACKSLASHESCAPE) > 0) {
+ StringBuffer sb = new StringBuffer();
+ int l = replace.length();
+ for (int i = 0; i < l; i++) {
+ char c = replace.charAt(i);
+ switch(c) {
+ case '\\':
+ i++;
+ // Let StringIndexOutOfBoundsException be thrown.
+ sb.append(replace.charAt(i));
+ break;
+ case '$':
+ int i1 = i + 1;
+ while (i1 < replace.length() &&
+ Character.isDigit(replace.charAt(i1))) i1++;
+ sb.append(m.substituteInto(replace.substring(i, i1)));
+ i = i1 - 1;
+ break;
+ default:
+ sb.append(c);
+ }
+ }
+ return sb.toString();
+ }
+ else
+ return m.substituteInto(replace);
+ }
+ }
/* Helper function for constructor */
private void addToken(REToken next) {
Index: classpath/gnu/regexp/REMatch.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/REMatch.java,v
retrieving revision 1.6
diff -u -r1.6 REMatch.java
--- classpath/gnu/regexp/REMatch.java 2 Feb 2006 15:16:59 -0000 1.6
+++ classpath/gnu/regexp/REMatch.java 8 Feb 2006 13:03:00 -0000
@@ -246,6 +246,8 @@
* <code>$0</code> through <code>$9</code>. <code>$0</code> matches
* the full substring matched; <code>$<i>n</i></code> matches
* subexpression number <i>n</i>.
+ * <code>$10, $11, ...</code> may match the 10th, 11th, ... subexpressions
+ * if such subexpressions exist.
*
* @param input A string consisting of literals and <code>$<i>n</i></code>
tokens.
*/
@@ -256,6 +258,16 @@
for (pos = 0; pos < input.length()-1; pos++) {
if ((input.charAt(pos) == '$') &&
(Character.isDigit(input.charAt(pos+1)))) {
int val = Character.digit(input.charAt(++pos),10);
+ int pos1 = pos + 1;
+ while (pos1 < input.length() &&
+ Character.isDigit(input.charAt(pos1))) {
+ int val1 = val*10 + Character.digit(input.charAt(pos1),10);
+ if (val1 >= start.length) break;
+ pos1++;
+ val = val1;
+ }
+ pos = pos1 - 1;
+
if (val < start.length) {
output.append(toString(val));
}
Index: classpath/java/util/regex/Matcher.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/util/regex/Matcher.java,v
retrieving revision 1.12
diff -u -r1.12 Matcher.java
--- classpath/java/util/regex/Matcher.java 6 Feb 2006 14:24:17 -0000
1.12
+++ classpath/java/util/regex/Matcher.java 8 Feb 2006 13:03:00 -0000
@@ -75,7 +75,8 @@
assertMatchOp();
sb.append(input.subSequence(appendPosition,
match.getStartIndex()).toString());
- sb.append(match.substituteInto(replacement));
+ sb.append(RE.getReplacement(replacement, match,
+ RE.REG_REPLACE_USE_BACKSLASHESCAPE));
appendPosition = match.getEndIndex();
return this;
}
@@ -190,7 +191,8 @@
{
reset();
// Semantics might not quite match
- return pattern.getRE().substitute(input, replacement, position);
+ return pattern.getRE().substitute(input, replacement, position,
+ RE.REG_REPLACE_USE_BACKSLASHESCAPE);
}
/**
@@ -199,7 +201,8 @@
public String replaceAll (String replacement)
{
reset();
- return pattern.getRE().substituteAll(input, replacement, position);
+ return pattern.getRE().substituteAll(input, replacement, position,
+ RE.REG_REPLACE_USE_BACKSLASHESCAPE);
}
public int groupCount ()