ChangeLog: 2006-07-21 Ito Kazumitsu <[EMAIL PROTECTED]> Fixes bug #28413 * gnu/java/util/regex/RETokenEnd.java(check_java_line_terminators): New field. (RETokenEnd): New constructer to set check_java_line_terminators. (matchThis): Checck line terminators if check_java_line_terminators. * gnu/java/util/regex/RETokenStart.java: Likewise. * gnu/regexp/RE.java(initialize): Use the new constructors for RETokenEnd and RETokenStart if REG_MULTILINE is set. * java/util/regex/Pattern.java(Patteren): Changed so that gnu/regexp/RE.java may use the new the new constructors.
Index: classpath/gnu/java/util/regex/RE.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/util/regex/RE.java,v retrieving revision 1.2 diff -u -r1.2 RE.java --- classpath/gnu/java/util/regex/RE.java 19 Jul 2006 19:47:07 -0000 1.2 +++ classpath/gnu/java/util/regex/RE.java 21 Jul 2006 17:44:23 -0000 @@ -750,7 +750,20 @@ else if ((unit.ch == '^') && !(unit.bk || quot)) { addToken(currentToken); currentToken = null; - addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null)); + RETokenStart token = null; + if ((cflags & REG_MULTILINE) > 0) { + String sep = syntax.getLineSeparator(); + if (sep == null) { + token = new RETokenStart(subIndex, null, true); + } + else { + token = new RETokenStart(subIndex, sep); + } + } + else { + token = new RETokenStart(subIndex, null); + } + addToken(token); } // END OF LINE OPERATOR @@ -759,7 +772,20 @@ else if ((unit.ch == '$') && !(unit.bk || quot)) { addToken(currentToken); currentToken = null; - addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null)); + RETokenEnd token = null; + if ((cflags & REG_MULTILINE) > 0) { + String sep = syntax.getLineSeparator(); + if (sep == null) { + token = new RETokenEnd(subIndex, null, true); + } + else { + token = new RETokenEnd(subIndex, sep); + } + } + else { + token = new RETokenEnd(subIndex, null); + } + addToken(token); } // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null) Index: classpath/gnu/java/util/regex/RETokenEnd.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/util/regex/RETokenEnd.java,v retrieving revision 1.1 diff -u -r1.1 RETokenEnd.java --- classpath/gnu/java/util/regex/RETokenEnd.java 7 Jun 2006 19:30:06 -0000 1.1 +++ classpath/gnu/java/util/regex/RETokenEnd.java 21 Jul 2006 17:44:23 -0000 @@ -43,10 +43,18 @@ * Indicates whether this token should match on a line break. */ private String newline; + private boolean check_java_line_terminators; RETokenEnd(int subIndex,String newline) { super(subIndex); this.newline = newline; + this.check_java_line_terminators = false; + } + + RETokenEnd(int subIndex, String newline, boolean b) { + super(subIndex); + this.newline = newline; + this.check_java_line_terminators = b; } int getMaximumLength() { @@ -58,6 +66,18 @@ if (ch == CharIndexed.OUT_OF_BOUNDS) return ((mymatch.eflags & RE.REG_NOTEOL)>0) ? null : mymatch; + if (check_java_line_terminators) { + if (ch == '\n') { + char ch1 = input.charAt(mymatch.index - 1); + if (ch1 == '\r') return null; + return mymatch; + } + if (ch == '\r') return mymatch; + if (ch == '\u0085') return mymatch; // A next-line character + if (ch == '\u2028') return mymatch; // A line-separator character + if (ch == '\u2029') return mymatch; // A paragraph-separator character + return null; + } if (newline != null) { char z; int i = 0; // position in newline Index: classpath/gnu/java/util/regex/RETokenStart.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/java/util/regex/RETokenStart.java,v retrieving revision 1.1 diff -u -r1.1 RETokenStart.java --- classpath/gnu/java/util/regex/RETokenStart.java 7 Jun 2006 19:30:06 -0000 1.1 +++ classpath/gnu/java/util/regex/RETokenStart.java 21 Jul 2006 17:44:23 -0000 @@ -39,10 +39,18 @@ class RETokenStart extends REToken { private String newline; // matches after a newline + private boolean check_java_line_terminators; RETokenStart(int subIndex, String newline) { super(subIndex); this.newline = newline; + this.check_java_line_terminators = false; + } + + RETokenStart(int subIndex, String newline, boolean b) { + super(subIndex); + this.newline = newline; + this.check_java_line_terminators = b; } int getMaximumLength() { @@ -53,6 +61,21 @@ // charAt(index-n) may be unknown on a Reader/InputStream. FIXME // Match after a newline if in multiline mode + if (check_java_line_terminators) { + char ch = input.charAt(mymatch.index - 1); + if (ch != CharIndexed.OUT_OF_BOUNDS) { + if (ch == '\n') return mymatch; + if (ch == '\r') { + char ch1 = input.charAt(mymatch.index); + if (ch1 != '\n') return mymatch; + return null; + } + if (ch == '\u0085') return mymatch; // A next-line character + if (ch == '\u2028') return mymatch; // A line-separator character + if (ch == '\u2029') return mymatch; // A paragraph-separator character + } + } + if (newline != null) { int len = newline.length(); if (mymatch.offset >= len) { Index: classpath/java/util/regex/Pattern.java =================================================================== RCS file: /cvsroot/classpath/classpath/java/util/regex/Pattern.java,v retrieving revision 1.17 diff -u -r1.17 Pattern.java --- classpath/java/util/regex/Pattern.java 7 Jun 2006 19:30:06 -0000 1.17 +++ classpath/java/util/regex/Pattern.java 21 Jul 2006 17:44:23 -0000 @@ -73,12 +73,17 @@ this.regex = regex; this.flags = flags; + RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4; int gnuFlags = 0; gnuFlags |= RE.REG_ICASE_USASCII; if ((flags & CASE_INSENSITIVE) != 0) gnuFlags |= RE.REG_ICASE; if ((flags & MULTILINE) != 0) - gnuFlags |= RE.REG_MULTILINE; + { + gnuFlags |= RE.REG_MULTILINE; + syntax = new RESyntax(syntax); + syntax.setLineSeparator(null); + } if ((flags & DOTALL) != 0) gnuFlags |= RE.REG_DOT_NEWLINE; if ((flags & UNICODE_CASE) != 0) @@ -86,7 +91,6 @@ // not yet supported: // if ((flags & CANON_EQ) != 0) gnuFlags = - RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4; if ((flags & UNIX_LINES) != 0) { // Use a syntax set with \n for linefeeds?