Hi Vadim,

> Yes, its almost time :-) Can you review the patch attached to the 24234,
> can it be applied verbatim, or should some changes be done to it?

I've found some problems with suggested fix:
It has some problems with handling escaped \, e.g. it's impossible to
write string so it produces one \ followed by some back-referenced part
(i.e. "\\\$0" will produce "\\<some_text>", not "\<some text>"
Also the patch introduces incompatibility.
To fix these problems I've added new REPLACE_WITH_ESCAPES constant and
rewritten subst() so it consistently handles escaped characters (also
I've refactored subst() a little).

So, now is your turn to review the fix ;)

Unified diffs are attached.

With best regards, Oleg.
Index: src/java/org/apache/regexp/RE.java
===================================================================
RCS file: /home/cvspublic/jakarta-regexp/src/java/org/apache/regexp/RE.java,v
retrieving revision 1.21
diff -u -r1.21 RE.java
--- src/java/org/apache/regexp/RE.java  27 Feb 2004 02:41:20 -0000      1.21
+++ src/java/org/apache/regexp/RE.java  29 Feb 2004 06:16:10 -0000
@@ -1591,6 +1591,12 @@
     public static final int REPLACE_BACKREFERENCES = 0x0002;
 
     /**
+     * Flag bit that indicates that subst should consider \ in substtitution
+     * string as escape symbol.
+     */
+    public static final int REPLACE_WITH_ESCAPES = 0x0004;
+
+    /**
      * Substitutes a string for this regular expression in another string.
      * This method works like the Perl function of the same name.
      * Given a regular expression of "a*b", a String to substituteIn of
@@ -1645,6 +1651,7 @@
         // Start at position 0 and search the whole string
         int pos = 0;
         int len = substituteIn.length();
+        Vector substParts = parseSubstString(substitution, flags);
 
         // Try a match at each position
         while (pos < len && match(substituteIn, pos))
@@ -1652,54 +1659,8 @@
             // Append string before match
             ret.append(substituteIn.substring(pos, getParenStart(0)));
 
-            if ((flags & REPLACE_BACKREFERENCES) != 0)
-            {
-                // Process backreferences
-                int lCurrentPosition = 0;
-                int lLastPosition = -2;
-                int lLength = substitution.length();
-                boolean bAddedPrefix = false;
-
-                while ((lCurrentPosition = substitution.indexOf("$", 
lCurrentPosition)) >= 0)
-                {
-                    if ((lCurrentPosition == 0 || 
substitution.charAt(lCurrentPosition - 1) != '\\')
-                        && lCurrentPosition+1 < lLength)
-                    {
-                        char c = substitution.charAt(lCurrentPosition + 1);
-                        if (c >= '0' && c <= '9')
-                        {
-                            if (bAddedPrefix == false)
-                            {
-                                // Append everything between the beginning of the
-                                // substitution string and the current $ sign
-                                ret.append(substitution.substring(0, 
lCurrentPosition));
-                                bAddedPrefix = true;
-                            }
-                            else
-                            {
-                                // Append everything between the last and the current 
$ sign
-                                ret.append(substitution.substring(lLastPosition + 2, 
lCurrentPosition));
-                            }
-
-                            // Append the parenthesized expression
-                            // Note: if a parenthesized expression of the requested
-                            // index is not available "null" is added to the string
-                            ret.append(getParen(c - '0'));
-                            lLastPosition = lCurrentPosition;
-                        }
-                    }
-
-                    // Move forward, skipping past match
-                    lCurrentPosition++;
-                }
-
-                // Append everything after the last $ sign
-                ret.append(substitution.substring(lLastPosition + 2, lLength));
-            }
-            else
-            {
-                // Append substitution without processing backreferences
-                ret.append(substitution);
+            for (int i = 0; i < substParts.size(); i++) {
+                ret.append(substParts.get(i).toString());
             }
 
             // Move forward, skipping past match
@@ -1774,5 +1735,71 @@
         }
 
         return false;
+    }
+
+    /**
+     * Parse given substitution string and returns vector of its parts
+     * (plain strings and BackRefSubstParts) which will be used in subst().
+     * @param substitution string to parse
+     * @param flags subst() flags
+     * @return vector of substitution string parts
+     */
+    private Vector parseSubstString(String substitution, int flags) {
+        final Vector parts = new Vector();
+        final boolean useBackrefs = ((flags & REPLACE_BACKREFERENCES) != 0);
+        final boolean useEscapes = ((flags & REPLACE_WITH_ESCAPES) != 0);
+        final int len = substitution.length();
+        StringBuffer part = new StringBuffer();
+        for (int i = 0; i < len; i++) {
+            final char curChar = substitution.charAt(i);
+            if (useEscapes && curChar == '\\' && (i + 1) < len)
+            {
+                // if we have escape char just add next character
+                part.append(substitution.charAt(++i));
+            }
+            else if (curChar == '\\' && (i + 1) < len
+                     && substitution.charAt(i + 1) == '$')
+            {
+                // we do not use escaped characters, but next is $
+                // add both \ and $
+                part.append(curChar);
+                part.append(substitution.charAt(++i));
+            }
+            else if (!useBackrefs || curChar != '$' || (i + 1) == len )
+            {
+                // plain char
+                part.append(curChar);
+            }
+            else
+            {
+                // we have backref
+                // add previous substitution part
+                parts.add(part.toString());
+                // add backref subst part
+                char paren = substitution.charAt(++i);
+                parts.add(new BackRefSubstPart(paren - '0'));
+                // clear current part
+                part = new StringBuffer();
+            }
+        }
+        parts.add(part.toString());
+
+        return parts;
+    }
+
+    /**
+     * Represents backreference in substitution string.
+     * To get current string for this beckref just call toString()
+     */
+    private class BackRefSubstPart {
+        final int paren;
+
+        BackRefSubstPart(int paren) {
+            this.paren = paren;
+        }
+
+        public String toString() {
+            return RE.this.getParen(paren);
+        }
     }
 }
Index: src/java/org/apache/regexp/RETest.java
===================================================================
RCS file: /home/cvspublic/jakarta-regexp/src/java/org/apache/regexp/RETest.java,v
retrieving revision 1.13
diff -u -r1.13 RETest.java
--- src/java/org/apache/regexp/RETest.java      27 Feb 2004 02:41:20 -0000      1.13
+++ src/java/org/apache/regexp/RETest.java      29 Feb 2004 06:16:15 -0000
@@ -501,6 +501,29 @@
         actual = r.subst("\r\na\r\n",
                          "b", RE.REPLACE_BACKREFERENCES);
         assertEquals("Wrong subst() result", "\r\nb\r\n", actual);
+
+        r = new RE("\\.");
+        actual = r.subst(".", "\\$0", RE.REPLACE_BACKREFERENCES);
+        System.err.println(actual);
+        assertEquals("Wrong subst() result", "\\$0", actual);
+
+        // Test subst with backrefs and escaped \
+        r = new RE("[.?]");
+        actual = r.subst(".", "\\\\$0",
+                         RE.REPLACE_BACKREFERENCES | RE.REPLACE_WITH_ESCAPES);
+        System.err.println(actual);
+        assertEquals("Wrong subst() result", "\\.", actual);
+
+        actual = r.subst(".", "\\\\\\$0",
+                         RE.REPLACE_BACKREFERENCES | RE.REPLACE_WITH_ESCAPES);
+        System.err.println(actual);
+        assertEquals("Wrong subst() result", "\\$0", actual);
+
+        // Test subst() without backref but with $ sing
+        r = new RE("[.?]");
+        actual = r.subst(".", "$0");
+        System.err.println(actual);
+        assertEquals("Wrong subst() result", "$0", actual);
     }
 
     public void assertEquals(String message, String expected, String actual)
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to