Hi Vadim,
> Yes, its almost time :-) Can you review the patch attached to the 24234,
> can it be applied verbatim, or should some changes be done to it?
I've found some problems with suggested fix:
It has some problems with handling escaped \, e.g. it's impossible to
write string so it produces one \ followed by some back-referenced part
(i.e. "\\\$0" will produce "\\<some_text>", not "\<some text>"
Also the patch introduces incompatibility.
To fix these problems I've added new REPLACE_WITH_ESCAPES constant and
rewritten subst() so it consistently handles escaped characters (also
I've refactored subst() a little).
So, now is your turn to review the fix ;)
Unified diffs are attached.
With best regards, Oleg.
Index: src/java/org/apache/regexp/RE.java
===================================================================
RCS file: /home/cvspublic/jakarta-regexp/src/java/org/apache/regexp/RE.java,v
retrieving revision 1.21
diff -u -r1.21 RE.java
--- src/java/org/apache/regexp/RE.java 27 Feb 2004 02:41:20 -0000 1.21
+++ src/java/org/apache/regexp/RE.java 29 Feb 2004 06:16:10 -0000
@@ -1591,6 +1591,12 @@
public static final int REPLACE_BACKREFERENCES = 0x0002;
/**
+ * Flag bit that indicates that subst should consider \ in substtitution
+ * string as escape symbol.
+ */
+ public static final int REPLACE_WITH_ESCAPES = 0x0004;
+
+ /**
* Substitutes a string for this regular expression in another string.
* This method works like the Perl function of the same name.
* Given a regular expression of "a*b", a String to substituteIn of
@@ -1645,6 +1651,7 @@
// Start at position 0 and search the whole string
int pos = 0;
int len = substituteIn.length();
+ Vector substParts = parseSubstString(substitution, flags);
// Try a match at each position
while (pos < len && match(substituteIn, pos))
@@ -1652,54 +1659,8 @@
// Append string before match
ret.append(substituteIn.substring(pos, getParenStart(0)));
- if ((flags & REPLACE_BACKREFERENCES) != 0)
- {
- // Process backreferences
- int lCurrentPosition = 0;
- int lLastPosition = -2;
- int lLength = substitution.length();
- boolean bAddedPrefix = false;
-
- while ((lCurrentPosition = substitution.indexOf("$",
lCurrentPosition)) >= 0)
- {
- if ((lCurrentPosition == 0 ||
substitution.charAt(lCurrentPosition - 1) != '\\')
- && lCurrentPosition+1 < lLength)
- {
- char c = substitution.charAt(lCurrentPosition + 1);
- if (c >= '0' && c <= '9')
- {
- if (bAddedPrefix == false)
- {
- // Append everything between the beginning of the
- // substitution string and the current $ sign
- ret.append(substitution.substring(0,
lCurrentPosition));
- bAddedPrefix = true;
- }
- else
- {
- // Append everything between the last and the current
$ sign
- ret.append(substitution.substring(lLastPosition + 2,
lCurrentPosition));
- }
-
- // Append the parenthesized expression
- // Note: if a parenthesized expression of the requested
- // index is not available "null" is added to the string
- ret.append(getParen(c - '0'));
- lLastPosition = lCurrentPosition;
- }
- }
-
- // Move forward, skipping past match
- lCurrentPosition++;
- }
-
- // Append everything after the last $ sign
- ret.append(substitution.substring(lLastPosition + 2, lLength));
- }
- else
- {
- // Append substitution without processing backreferences
- ret.append(substitution);
+ for (int i = 0; i < substParts.size(); i++) {
+ ret.append(substParts.get(i).toString());
}
// Move forward, skipping past match
@@ -1774,5 +1735,71 @@
}
return false;
+ }
+
+ /**
+ * Parse given substitution string and returns vector of its parts
+ * (plain strings and BackRefSubstParts) which will be used in subst().
+ * @param substitution string to parse
+ * @param flags subst() flags
+ * @return vector of substitution string parts
+ */
+ private Vector parseSubstString(String substitution, int flags) {
+ final Vector parts = new Vector();
+ final boolean useBackrefs = ((flags & REPLACE_BACKREFERENCES) != 0);
+ final boolean useEscapes = ((flags & REPLACE_WITH_ESCAPES) != 0);
+ final int len = substitution.length();
+ StringBuffer part = new StringBuffer();
+ for (int i = 0; i < len; i++) {
+ final char curChar = substitution.charAt(i);
+ if (useEscapes && curChar == '\\' && (i + 1) < len)
+ {
+ // if we have escape char just add next character
+ part.append(substitution.charAt(++i));
+ }
+ else if (curChar == '\\' && (i + 1) < len
+ && substitution.charAt(i + 1) == '$')
+ {
+ // we do not use escaped characters, but next is $
+ // add both \ and $
+ part.append(curChar);
+ part.append(substitution.charAt(++i));
+ }
+ else if (!useBackrefs || curChar != '$' || (i + 1) == len )
+ {
+ // plain char
+ part.append(curChar);
+ }
+ else
+ {
+ // we have backref
+ // add previous substitution part
+ parts.add(part.toString());
+ // add backref subst part
+ char paren = substitution.charAt(++i);
+ parts.add(new BackRefSubstPart(paren - '0'));
+ // clear current part
+ part = new StringBuffer();
+ }
+ }
+ parts.add(part.toString());
+
+ return parts;
+ }
+
+ /**
+ * Represents backreference in substitution string.
+ * To get current string for this beckref just call toString()
+ */
+ private class BackRefSubstPart {
+ final int paren;
+
+ BackRefSubstPart(int paren) {
+ this.paren = paren;
+ }
+
+ public String toString() {
+ return RE.this.getParen(paren);
+ }
}
}
Index: src/java/org/apache/regexp/RETest.java
===================================================================
RCS file: /home/cvspublic/jakarta-regexp/src/java/org/apache/regexp/RETest.java,v
retrieving revision 1.13
diff -u -r1.13 RETest.java
--- src/java/org/apache/regexp/RETest.java 27 Feb 2004 02:41:20 -0000 1.13
+++ src/java/org/apache/regexp/RETest.java 29 Feb 2004 06:16:15 -0000
@@ -501,6 +501,29 @@
actual = r.subst("\r\na\r\n",
"b", RE.REPLACE_BACKREFERENCES);
assertEquals("Wrong subst() result", "\r\nb\r\n", actual);
+
+ r = new RE("\\.");
+ actual = r.subst(".", "\\$0", RE.REPLACE_BACKREFERENCES);
+ System.err.println(actual);
+ assertEquals("Wrong subst() result", "\\$0", actual);
+
+ // Test subst with backrefs and escaped \
+ r = new RE("[.?]");
+ actual = r.subst(".", "\\\\$0",
+ RE.REPLACE_BACKREFERENCES | RE.REPLACE_WITH_ESCAPES);
+ System.err.println(actual);
+ assertEquals("Wrong subst() result", "\\.", actual);
+
+ actual = r.subst(".", "\\\\\\$0",
+ RE.REPLACE_BACKREFERENCES | RE.REPLACE_WITH_ESCAPES);
+ System.err.println(actual);
+ assertEquals("Wrong subst() result", "\\$0", actual);
+
+ // Test subst() without backref but with $ sing
+ r = new RE("[.?]");
+ actual = r.subst(".", "$0");
+ System.err.println(actual);
+ assertEquals("Wrong subst() result", "$0", actual);
}
public void assertEquals(String message, String expected, String actual)
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]