From: Ito Kazumitsu <[EMAIL PROTECTED]>
Subject: [cp-patches] Re: RFC: gnu.regexp: miscellaneous fixes
Date: Sat, 25 Mar 2006 01:05:28 +0900 (JST)

> I have added some touches, and the good news is that
> all the test cases remaining in Mauve's
> gnu/testlet/java/util/regex/Pattern/testdata2
> pass now.

This fix had some bug which caused an infinite loop in
java.util.regex.Matcher#find().  That bug has been removed
this time, and \G should be usable.

ChangeLog:
2006-03-30  Ito Kazumitsu  <[EMAIL PROTECTED]>

        * gnu/regexp/CharIndexed.java(setAnchor): New method.
        * gnu/regexp/CharIndexedCharArray.java(setAnchor): New method.
        * gnu/regexp/CharIndexedInputStream.java(setAnchor): New method.
        * gnu/regexp/CharIndexedString.java(setAnchor): New method.
        * gnu/regexp/CharIndexedStringBuffer.java(setAnchor): New method.
        * gnu/regexp/CharIndexedCharSequence.java: New file.
        * gnu/regexp/RE.java(makeCharIndexed): Make a new CharIndexed
        using CharIndexedCharSequence. Use setAnchor when the input
        object is already a CharIndexed.
        * java/util/regex/Matcher.java(inputCharIndexed): New field
        to be used as a parameter of the RE#getMatch.

Index: classpath/gnu/regexp/CharIndexed.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexed.java,v
retrieving revision 1.4
diff -u -r1.4 CharIndexed.java
--- classpath/gnu/regexp/CharIndexed.java       26 Mar 2006 22:01:55 -0000      
1.4
+++ classpath/gnu/regexp/CharIndexed.java       30 Mar 2006 16:48:14 -0000
@@ -108,4 +108,9 @@
      * Returns the anchor.
      */
     int getAnchor();
+
+    /**
+     * Sets the anchor.
+     */
+    void setAnchor(int anchor);
 }
Index: classpath/gnu/regexp/CharIndexedCharArray.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexedCharArray.java,v
retrieving revision 1.4
diff -u -r1.4 CharIndexedCharArray.java
--- classpath/gnu/regexp/CharIndexedCharArray.java      26 Mar 2006 22:01:55 
-0000      1.4
+++ classpath/gnu/regexp/CharIndexedCharArray.java      30 Mar 2006 16:48:14 
-0000
@@ -77,5 +77,6 @@
     public REMatch getLastMatch() { return lastMatch; }
 
     public int getAnchor() { return anchor; }
+    public void setAnchor(int anchor) { this.anchor = anchor; }
 
 }
Index: classpath/gnu/regexp/CharIndexedCharSequence.java
===================================================================
RCS file: classpath/gnu/regexp/CharIndexedCharSequence.java
diff -N classpath/gnu/regexp/CharIndexedCharSequence.java
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ classpath/gnu/regexp/CharIndexedCharSequence.java   30 Mar 2006 16:48:14 
-0000
@@ -0,0 +1,82 @@
+/* gnu/regexp/CharIndexedCharSequence.java
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.io.Serializable;
+
+class CharIndexedCharSequence implements CharIndexed, Serializable {
+    private CharSequence s;
+    private int anchor;
+    private int len;
+    
+    CharIndexedCharSequence(CharSequence s, int index) {
+       this.s = s;
+       len = s.length();
+       anchor = index;
+    }
+
+    public char charAt(int index) {
+       int pos = anchor + index;
+       return ((pos < len) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
+    }
+    
+    public boolean isValid() {
+       return (anchor < len);
+    }
+    
+    public boolean move(int index) {
+       return ((anchor += index) < len);
+    }
+
+    public CharIndexed lookBehind(int index, int length) {
+       if (length > (anchor + index)) length = anchor + index;
+       return new CharIndexedCharSequence(s, anchor + index - length);
+    }
+
+    public int length() {
+       return len - anchor;
+    }
+
+    private REMatch lastMatch;
+    public void setLastMatch(REMatch match) {
+       lastMatch = (REMatch)match.clone();
+       lastMatch.anchor = anchor;
+    }
+    public REMatch getLastMatch() { return lastMatch; }
+    public int getAnchor() { return anchor; }
+    public void setAnchor(int anchor) { this.anchor = anchor; }
+}
Index: classpath/gnu/regexp/CharIndexedInputStream.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexedInputStream.java,v
retrieving revision 1.5
diff -u -r1.5 CharIndexedInputStream.java
--- classpath/gnu/regexp/CharIndexedInputStream.java    26 Mar 2006 22:01:55 
-0000      1.5
+++ classpath/gnu/regexp/CharIndexedInputStream.java    30 Mar 2006 16:48:14 
-0000
@@ -171,5 +171,11 @@
            "difficult to support getAnchor for an input stream");
     }
 
+    public void setAnchor(int anchor) {
+       throw new UnsupportedOperationException(
+           "difficult to support setAnchor for an input stream");
+    }
+
+
 }
 
Index: classpath/gnu/regexp/CharIndexedString.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexedString.java,v
retrieving revision 1.4
diff -u -r1.4 CharIndexedString.java
--- classpath/gnu/regexp/CharIndexedString.java 26 Mar 2006 22:01:55 -0000      
1.4
+++ classpath/gnu/regexp/CharIndexedString.java 30 Mar 2006 16:48:14 -0000
@@ -78,4 +78,5 @@
     }
     public REMatch getLastMatch() { return lastMatch; }
     public int getAnchor() { return anchor; }
+    public void setAnchor(int anchor) { this.anchor = anchor; }
 }
Index: classpath/gnu/regexp/CharIndexedStringBuffer.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexedStringBuffer.java,v
retrieving revision 1.4
diff -u -r1.4 CharIndexedStringBuffer.java
--- classpath/gnu/regexp/CharIndexedStringBuffer.java   26 Mar 2006 22:01:55 
-0000      1.4
+++ classpath/gnu/regexp/CharIndexedStringBuffer.java   30 Mar 2006 16:48:14 
-0000
@@ -77,4 +77,5 @@
   public REMatch getLastMatch() { return lastMatch; }
 
   public int getAnchor() { return anchor; }
+  public void setAnchor(int anchor) { this.anchor = anchor; }
 }
Index: classpath/gnu/regexp/RE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v
retrieving revision 1.21
diff -u -r1.21 RE.java
--- classpath/gnu/regexp/RE.java        26 Mar 2006 22:01:55 -0000      1.21
+++ classpath/gnu/regexp/RE.java        30 Mar 2006 16:48:15 -0000
@@ -2020,14 +2020,20 @@
   // This method was originally a private method, but has been made
   // public because java.util.regex.Matcher uses this.
   public static CharIndexed makeCharIndexed(Object input, int index) {
-      // We could let a String fall through to final input, but since
-      // it's the most likely input type, we check it first.
+      // We could let a String or a CharSequence fall through
+      // to final input, but since it's the most likely input type, 
+      // we check it first.
       // The case where input is already an instance of CharIndexed is
       // also supposed to be very likely.
-    if (input instanceof String)
+    if (input instanceof CharSequence)
+      return new CharIndexedCharSequence((CharSequence) input,index);
+    else if (input instanceof CharIndexed) {
+       CharIndexed ci = (CharIndexed) input;
+       ci.setAnchor(index);
+       return ci;
+    }
+    else if (input instanceof String)
       return new CharIndexedString((String) input,index);
-    else if (input instanceof CharIndexed)
-       return (CharIndexed) input; // do we lose index info?
     else if (input instanceof char[])
       return new CharIndexedCharArray((char[]) input,index);
     else if (input instanceof StringBuffer)
Index: classpath/java/util/regex/Matcher.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/util/regex/Matcher.java,v
retrieving revision 1.15
diff -u -r1.15 Matcher.java
--- classpath/java/util/regex/Matcher.java      28 Mar 2006 17:07:52 -0000      
1.15
+++ classpath/java/util/regex/Matcher.java      30 Mar 2006 16:48:15 -0000
@@ -40,6 +40,7 @@
 
 import gnu.regexp.RE;
 import gnu.regexp.REMatch;
+import gnu.regexp.CharIndexed;
 
 /**
  * Instance of a regular expression applied to a char sequence.
@@ -50,6 +51,10 @@
 {
   private Pattern pattern;
   private CharSequence input;
+  // We use CharIndexed as an input object to the getMatch method in order
+  // that /\G/ (the end of the previous match) may work.  The information
+  // of the previous match is stored in the CharIndexed object.
+  private CharIndexed inputCharIndexed;
   private int position;
   private int appendPosition;
   private REMatch match;
@@ -58,6 +63,7 @@
   {
     this.pattern = pattern;
     this.input = input;
+    this.inputCharIndexed = RE.makeCharIndexed(input, 0);
   }
   
   /**
@@ -119,7 +125,7 @@
   public boolean find ()
   {
     boolean first = (match == null);
-    match = pattern.getRE().getMatch(input, position);
+    match = pattern.getRE().getMatch(inputCharIndexed, position);
     if (match != null)
       {
        int endIndex = match.getEndIndex();
@@ -150,7 +156,7 @@
    */
   public boolean find (int start)
   {
-    match = pattern.getRE().getMatch(input, start);
+    match = pattern.getRE().getMatch(inputCharIndexed, start);
     if (match != null)
       {
        position = match.getEndIndex();
@@ -212,7 +218,7 @@
  
   public boolean lookingAt ()
   {
-    match = pattern.getRE().getMatch(input, 0);
+    match = pattern.getRE().getMatch(inputCharIndexed, 0);
     if (match != null)
       {
        if (match.getStartIndex() == 0)
@@ -237,7 +243,7 @@
    */
   public boolean matches ()
   {
-    match = pattern.getRE().getMatch(input, 0, RE.REG_TRY_ENTIRE_MATCH);
+    match = pattern.getRE().getMatch(inputCharIndexed, 0, 
RE.REG_TRY_ENTIRE_MATCH);
     if (match != null)
       {
        if (match.getStartIndex() == 0)

Reply via email to