From: Ito Kazumitsu <[EMAIL PROTECTED]>
Date: Wed, 15 Mar 2006 07:07:36 +0900 (JST)

> This fix will make pass the following tests in
> gnu/testlet/java/util/regex/Pattern/testdata2:

I have made another fix so that the followings pass.

/(?<=(foo)a)bar/
    fooabar
 0: bar
 1: foo
    *** Failers
No match
    bar
No match
    foobbar
No match

/(?<=(foo))bar\1/
    foobarfoo
 0: barfoo
 1: foo
    foobarfootling
 0: barfoo
 1: foo
    *** Failers
No match
    foobar
No match
    barfoo
No match

/(?>.*)(?<=(abcd|wxyz))/
    alphabetabcd
 0: alphabetabcd
 1: abcd
    endingwxyz
 0: endingwxyz
 1: wxyz
    *** Failers
No match
    a rather long string that doesn't end with one of them
No match

ChangeLog:
2006-03-16  Ito Kazumitsu  <[EMAIL PROTECTED]>

        * gnu/regexp/REMatch.java(matchedCharIndexed): New field,
        (start, end): Added comment about the negative values of them,
        (finish): Saves the input text in matchedCharIndexed,
        (toString): If the start or end index is out of bounds of the
        matched text, get the substring from matchedCharIndexed.
        Added special handlings in case start/end index < -1.
        * gnu/regexp/RETokenLookAhead.java(matchThis): Return the newly
        found match, but keep the index as the original match.
        * gnu/regexp/RETokenLookBehind.java(matchThis): Return the newly
        found match, but keep the index as the original match.
        * gnu/regexp/RETokenBackRef.java(matchThis): Added special handlings
        in case start/end index < -1.

Index: classpath/gnu/regexp/REMatch.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/REMatch.java,v
retrieving revision 1.9
diff -u -r1.9 REMatch.java
--- classpath/gnu/regexp/REMatch.java   11 Mar 2006 01:39:49 -0000      1.9
+++ classpath/gnu/regexp/REMatch.java   15 Mar 2006 15:14:14 -0000
@@ -49,6 +49,7 @@
  */
 public final class REMatch implements Serializable, Cloneable {
     private String matchedText;
+    private CharIndexed matchedCharIndexed;
 
     // These variables are package scope for fast access within the engine
     int eflags; // execution flags this match was made using
@@ -66,6 +67,10 @@
     int index; // used while matching to mark current match position in input
     int[] start; // start positions (relative to offset) for each (sub)exp.
     int[] end;   // end positions for the same
+    // start[i] == -1 or end[i] == -1 means that the start/end position is 
void.
+    // start[i] == p or end[i] == p where p < 0 and p != -1 means that
+    // the actual start/end position is (p+1). Start/end positions may
+    // become negative when the subexpression is in a RETokenLookBehind.
     boolean empty; // empty string matched. This flag is used only within
                   // RETokenRepeated.
 
@@ -106,6 +111,7 @@
        for (i = 0; i < end[0]; i++)
            sb.append(text.charAt(i));
        matchedText = sb.toString();
+       matchedCharIndexed = text;
        for (i = 0; i < start.length; i++) {
            // If any subexpressions didn't terminate, they don't count
            // TODO check if this code ever gets hit
@@ -181,7 +187,18 @@
        if ((sub >= start.length) || sub < 0)
            throw new IndexOutOfBoundsException("No group " + sub);
        if (start[sub] == -1) return null;
-       return (matchedText.substring(start[sub],end[sub]));
+       if (start[sub] >= 0 && end[sub] <= matchedText.length())
+           return (matchedText.substring(start[sub],end[sub]));
+       else {
+           StringBuffer sb = new StringBuffer();
+           int s = start[sub];
+           int e = end[sub];
+           if (s < 0) s += 1;
+           if (e < 0) e += 1;
+           for (int i = start[0] + s; i < start[0] + e; i++)
+               sb.append(matchedCharIndexed.charAt(i));
+           return sb.toString();
+       }
     }
     
     /** 
Index: classpath/gnu/regexp/RETokenBackRef.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenBackRef.java,v
retrieving revision 1.5
diff -u -r1.5 RETokenBackRef.java
--- classpath/gnu/regexp/RETokenBackRef.java    11 Mar 2006 01:39:49 -0000      
1.5
+++ classpath/gnu/regexp/RETokenBackRef.java    15 Mar 2006 15:14:14 -0000
@@ -57,6 +57,8 @@
        b = mymatch.start[num];
        e = mymatch.end[num];
        if ((b==-1)||(e==-1)) return null; // this shouldn't happen, but...
+       if (b < 0) b += 1;
+       if (e < 0) e += 1;
        for (int i=b; i<e; i++) {
            char c1 = input.charAt(mymatch.index+i-b);
            char c2 = input.charAt(i);
Index: classpath/gnu/regexp/RETokenLookAhead.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenLookAhead.java,v
retrieving revision 1.4
diff -u -r1.4 RETokenLookAhead.java
--- classpath/gnu/regexp/RETokenLookAhead.java  11 Mar 2006 01:39:49 -0000      
1.4
+++ classpath/gnu/regexp/RETokenLookAhead.java  15 Mar 2006 15:14:14 -0000
@@ -61,7 +61,8 @@
     REMatch trymatch = (REMatch)mymatch.clone();
     if (re.match(input, trymatch)) {
       if (negative) return null;
-      return mymatch;
+      trymatch.index = mymatch.index;
+      return trymatch;
     }
     else {
       if (negative) return mymatch;
Index: classpath/gnu/regexp/RETokenLookBehind.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenLookBehind.java,v
retrieving revision 1.2
diff -u -r1.2 RETokenLookBehind.java
--- classpath/gnu/regexp/RETokenLookBehind.java 11 Mar 2006 01:39:49 -0000      
1.2
+++ classpath/gnu/regexp/RETokenLookBehind.java 15 Mar 2006 15:14:14 -0000
@@ -62,14 +62,24 @@
     REMatch trymatch = (REMatch)mymatch.clone();
     REMatch trymatch1 = (REMatch)mymatch.clone();
     REMatch newMatch = null;
-    int curIndex = trymatch.index + behind.length() - input.length();
+    int diff = behind.length() - input.length();
+    int curIndex = trymatch.index + diff;
     trymatch.index = 0;
     RETokenMatchHereOnly stopper = new RETokenMatchHereOnly(curIndex);
     REToken re1 = (REToken) re.clone();
     re1.chain(stopper);
     if (re1.match(behind, trymatch)) {
       if (negative) return null;
-      return mymatch;
+      for (int i = 0; i < trymatch.start.length; i++) {
+         if (trymatch.start[i] != -1 && trymatch.end[i] != -1) {
+             trymatch.start[i] -= diff;
+             if (trymatch.start[i] < 0) trymatch.start[i] -= 1;
+             trymatch.end[i] -= diff;
+             if (trymatch.end[i] < 0) trymatch.end[i] -= 1;
+         }
+      }
+      trymatch.index = mymatch.index;
+      return trymatch;
     }
     else {
       if (negative) return mymatch;

Reply via email to