From: Ito Kazumitsu <[EMAIL PROTECTED]>
Date: Wed, 15 Mar 2006 07:07:36 +0900 (JST)
> This fix will make pass the following tests in
> gnu/testlet/java/util/regex/Pattern/testdata2:
I have made another fix so that the followings pass.
/(?<=(foo)a)bar/
fooabar
0: bar
1: foo
*** Failers
No match
bar
No match
foobbar
No match
/(?<=(foo))bar\1/
foobarfoo
0: barfoo
1: foo
foobarfootling
0: barfoo
1: foo
*** Failers
No match
foobar
No match
barfoo
No match
/(?>.*)(?<=(abcd|wxyz))/
alphabetabcd
0: alphabetabcd
1: abcd
endingwxyz
0: endingwxyz
1: wxyz
*** Failers
No match
a rather long string that doesn't end with one of them
No match
ChangeLog:
2006-03-16 Ito Kazumitsu <[EMAIL PROTECTED]>
* gnu/regexp/REMatch.java(matchedCharIndexed): New field,
(start, end): Added comment about the negative values of them,
(finish): Saves the input text in matchedCharIndexed,
(toString): If the start or end index is out of bounds of the
matched text, get the substring from matchedCharIndexed.
Added special handlings in case start/end index < -1.
* gnu/regexp/RETokenLookAhead.java(matchThis): Return the newly
found match, but keep the index as the original match.
* gnu/regexp/RETokenLookBehind.java(matchThis): Return the newly
found match, but keep the index as the original match.
* gnu/regexp/RETokenBackRef.java(matchThis): Added special handlings
in case start/end index < -1.
Index: classpath/gnu/regexp/REMatch.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/REMatch.java,v
retrieving revision 1.9
diff -u -r1.9 REMatch.java
--- classpath/gnu/regexp/REMatch.java 11 Mar 2006 01:39:49 -0000 1.9
+++ classpath/gnu/regexp/REMatch.java 15 Mar 2006 15:14:14 -0000
@@ -49,6 +49,7 @@
*/
public final class REMatch implements Serializable, Cloneable {
private String matchedText;
+ private CharIndexed matchedCharIndexed;
// These variables are package scope for fast access within the engine
int eflags; // execution flags this match was made using
@@ -66,6 +67,10 @@
int index; // used while matching to mark current match position in input
int[] start; // start positions (relative to offset) for each (sub)exp.
int[] end; // end positions for the same
+ // start[i] == -1 or end[i] == -1 means that the start/end position is
void.
+ // start[i] == p or end[i] == p where p < 0 and p != -1 means that
+ // the actual start/end position is (p+1). Start/end positions may
+ // become negative when the subexpression is in a RETokenLookBehind.
boolean empty; // empty string matched. This flag is used only within
// RETokenRepeated.
@@ -106,6 +111,7 @@
for (i = 0; i < end[0]; i++)
sb.append(text.charAt(i));
matchedText = sb.toString();
+ matchedCharIndexed = text;
for (i = 0; i < start.length; i++) {
// If any subexpressions didn't terminate, they don't count
// TODO check if this code ever gets hit
@@ -181,7 +187,18 @@
if ((sub >= start.length) || sub < 0)
throw new IndexOutOfBoundsException("No group " + sub);
if (start[sub] == -1) return null;
- return (matchedText.substring(start[sub],end[sub]));
+ if (start[sub] >= 0 && end[sub] <= matchedText.length())
+ return (matchedText.substring(start[sub],end[sub]));
+ else {
+ StringBuffer sb = new StringBuffer();
+ int s = start[sub];
+ int e = end[sub];
+ if (s < 0) s += 1;
+ if (e < 0) e += 1;
+ for (int i = start[0] + s; i < start[0] + e; i++)
+ sb.append(matchedCharIndexed.charAt(i));
+ return sb.toString();
+ }
}
/**
Index: classpath/gnu/regexp/RETokenBackRef.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenBackRef.java,v
retrieving revision 1.5
diff -u -r1.5 RETokenBackRef.java
--- classpath/gnu/regexp/RETokenBackRef.java 11 Mar 2006 01:39:49 -0000
1.5
+++ classpath/gnu/regexp/RETokenBackRef.java 15 Mar 2006 15:14:14 -0000
@@ -57,6 +57,8 @@
b = mymatch.start[num];
e = mymatch.end[num];
if ((b==-1)||(e==-1)) return null; // this shouldn't happen, but...
+ if (b < 0) b += 1;
+ if (e < 0) e += 1;
for (int i=b; i<e; i++) {
char c1 = input.charAt(mymatch.index+i-b);
char c2 = input.charAt(i);
Index: classpath/gnu/regexp/RETokenLookAhead.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenLookAhead.java,v
retrieving revision 1.4
diff -u -r1.4 RETokenLookAhead.java
--- classpath/gnu/regexp/RETokenLookAhead.java 11 Mar 2006 01:39:49 -0000
1.4
+++ classpath/gnu/regexp/RETokenLookAhead.java 15 Mar 2006 15:14:14 -0000
@@ -61,7 +61,8 @@
REMatch trymatch = (REMatch)mymatch.clone();
if (re.match(input, trymatch)) {
if (negative) return null;
- return mymatch;
+ trymatch.index = mymatch.index;
+ return trymatch;
}
else {
if (negative) return mymatch;
Index: classpath/gnu/regexp/RETokenLookBehind.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenLookBehind.java,v
retrieving revision 1.2
diff -u -r1.2 RETokenLookBehind.java
--- classpath/gnu/regexp/RETokenLookBehind.java 11 Mar 2006 01:39:49 -0000
1.2
+++ classpath/gnu/regexp/RETokenLookBehind.java 15 Mar 2006 15:14:14 -0000
@@ -62,14 +62,24 @@
REMatch trymatch = (REMatch)mymatch.clone();
REMatch trymatch1 = (REMatch)mymatch.clone();
REMatch newMatch = null;
- int curIndex = trymatch.index + behind.length() - input.length();
+ int diff = behind.length() - input.length();
+ int curIndex = trymatch.index + diff;
trymatch.index = 0;
RETokenMatchHereOnly stopper = new RETokenMatchHereOnly(curIndex);
REToken re1 = (REToken) re.clone();
re1.chain(stopper);
if (re1.match(behind, trymatch)) {
if (negative) return null;
- return mymatch;
+ for (int i = 0; i < trymatch.start.length; i++) {
+ if (trymatch.start[i] != -1 && trymatch.end[i] != -1) {
+ trymatch.start[i] -= diff;
+ if (trymatch.start[i] < 0) trymatch.start[i] -= 1;
+ trymatch.end[i] -= diff;
+ if (trymatch.end[i] < 0) trymatch.end[i] -= 1;
+ }
+ }
+ trymatch.index = mymatch.index;
+ return trymatch;
}
else {
if (negative) return mymatch;