From: Ito Kazumitsu <[EMAIL PROTECTED]> Date: Wed, 15 Mar 2006 07:07:36 +0900 (JST)
> This fix will make pass the following tests in > gnu/testlet/java/util/regex/Pattern/testdata2: I have made another fix so that the followings pass. /(?<=(foo)a)bar/ fooabar 0: bar 1: foo *** Failers No match bar No match foobbar No match /(?<=(foo))bar\1/ foobarfoo 0: barfoo 1: foo foobarfootling 0: barfoo 1: foo *** Failers No match foobar No match barfoo No match /(?>.*)(?<=(abcd|wxyz))/ alphabetabcd 0: alphabetabcd 1: abcd endingwxyz 0: endingwxyz 1: wxyz *** Failers No match a rather long string that doesn't end with one of them No match ChangeLog: 2006-03-16 Ito Kazumitsu <[EMAIL PROTECTED]> * gnu/regexp/REMatch.java(matchedCharIndexed): New field, (start, end): Added comment about the negative values of them, (finish): Saves the input text in matchedCharIndexed, (toString): If the start or end index is out of bounds of the matched text, get the substring from matchedCharIndexed. Added special handlings in case start/end index < -1. * gnu/regexp/RETokenLookAhead.java(matchThis): Return the newly found match, but keep the index as the original match. * gnu/regexp/RETokenLookBehind.java(matchThis): Return the newly found match, but keep the index as the original match. * gnu/regexp/RETokenBackRef.java(matchThis): Added special handlings in case start/end index < -1.
Index: classpath/gnu/regexp/REMatch.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/regexp/REMatch.java,v retrieving revision 1.9 diff -u -r1.9 REMatch.java --- classpath/gnu/regexp/REMatch.java 11 Mar 2006 01:39:49 -0000 1.9 +++ classpath/gnu/regexp/REMatch.java 15 Mar 2006 15:14:14 -0000 @@ -49,6 +49,7 @@ */ public final class REMatch implements Serializable, Cloneable { private String matchedText; + private CharIndexed matchedCharIndexed; // These variables are package scope for fast access within the engine int eflags; // execution flags this match was made using @@ -66,6 +67,10 @@ int index; // used while matching to mark current match position in input int[] start; // start positions (relative to offset) for each (sub)exp. int[] end; // end positions for the same + // start[i] == -1 or end[i] == -1 means that the start/end position is void. + // start[i] == p or end[i] == p where p < 0 and p != -1 means that + // the actual start/end position is (p+1). Start/end positions may + // become negative when the subexpression is in a RETokenLookBehind. boolean empty; // empty string matched. This flag is used only within // RETokenRepeated. @@ -106,6 +111,7 @@ for (i = 0; i < end[0]; i++) sb.append(text.charAt(i)); matchedText = sb.toString(); + matchedCharIndexed = text; for (i = 0; i < start.length; i++) { // If any subexpressions didn't terminate, they don't count // TODO check if this code ever gets hit @@ -181,7 +187,18 @@ if ((sub >= start.length) || sub < 0) throw new IndexOutOfBoundsException("No group " + sub); if (start[sub] == -1) return null; - return (matchedText.substring(start[sub],end[sub])); + if (start[sub] >= 0 && end[sub] <= matchedText.length()) + return (matchedText.substring(start[sub],end[sub])); + else { + StringBuffer sb = new StringBuffer(); + int s = start[sub]; + int e = end[sub]; + if (s < 0) s += 1; + if (e < 0) e += 1; + for (int i = start[0] + s; i < start[0] + e; i++) + sb.append(matchedCharIndexed.charAt(i)); + return sb.toString(); + } } /** Index: classpath/gnu/regexp/RETokenBackRef.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenBackRef.java,v retrieving revision 1.5 diff -u -r1.5 RETokenBackRef.java --- classpath/gnu/regexp/RETokenBackRef.java 11 Mar 2006 01:39:49 -0000 1.5 +++ classpath/gnu/regexp/RETokenBackRef.java 15 Mar 2006 15:14:14 -0000 @@ -57,6 +57,8 @@ b = mymatch.start[num]; e = mymatch.end[num]; if ((b==-1)||(e==-1)) return null; // this shouldn't happen, but... + if (b < 0) b += 1; + if (e < 0) e += 1; for (int i=b; i<e; i++) { char c1 = input.charAt(mymatch.index+i-b); char c2 = input.charAt(i); Index: classpath/gnu/regexp/RETokenLookAhead.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenLookAhead.java,v retrieving revision 1.4 diff -u -r1.4 RETokenLookAhead.java --- classpath/gnu/regexp/RETokenLookAhead.java 11 Mar 2006 01:39:49 -0000 1.4 +++ classpath/gnu/regexp/RETokenLookAhead.java 15 Mar 2006 15:14:14 -0000 @@ -61,7 +61,8 @@ REMatch trymatch = (REMatch)mymatch.clone(); if (re.match(input, trymatch)) { if (negative) return null; - return mymatch; + trymatch.index = mymatch.index; + return trymatch; } else { if (negative) return mymatch; Index: classpath/gnu/regexp/RETokenLookBehind.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenLookBehind.java,v retrieving revision 1.2 diff -u -r1.2 RETokenLookBehind.java --- classpath/gnu/regexp/RETokenLookBehind.java 11 Mar 2006 01:39:49 -0000 1.2 +++ classpath/gnu/regexp/RETokenLookBehind.java 15 Mar 2006 15:14:14 -0000 @@ -62,14 +62,24 @@ REMatch trymatch = (REMatch)mymatch.clone(); REMatch trymatch1 = (REMatch)mymatch.clone(); REMatch newMatch = null; - int curIndex = trymatch.index + behind.length() - input.length(); + int diff = behind.length() - input.length(); + int curIndex = trymatch.index + diff; trymatch.index = 0; RETokenMatchHereOnly stopper = new RETokenMatchHereOnly(curIndex); REToken re1 = (REToken) re.clone(); re1.chain(stopper); if (re1.match(behind, trymatch)) { if (negative) return null; - return mymatch; + for (int i = 0; i < trymatch.start.length; i++) { + if (trymatch.start[i] != -1 && trymatch.end[i] != -1) { + trymatch.start[i] -= diff; + if (trymatch.start[i] < 0) trymatch.start[i] -= 1; + trymatch.end[i] -= diff; + if (trymatch.end[i] < 0) trymatch.end[i] -= 1; + } + } + trymatch.index = mymatch.index; + return trymatch; } else { if (negative) return mymatch;