This patch implements the region-based matching
methods in java.util.regex.  There are still a number
of minor bugs, due to missing features in our regex
implementation, but I'd prefer to keep those fixes
for further patches, as this one already alters things
a great deal and does at least implement all the required
methods and track the state correctly.

ChangeLog:

2008-05-11  Andrew John Hughes  <[EMAIL PROTECTED]>

        * java/util/regex/Matcher.java:
        (regionStart): New variable.
        (regionEnd): Likewise.
        (transparentBounds): Likewise.
        (anchoringBounds): Likewise.
        (Matcher()): Initialise new variables.
        (find()): Alter to use new settings.
        (find(int)): Likewise.
        (lookingAt()): Likewise.
        (matches()): Likewise.
        (reset()): Reset region.
        (reset(CharSequence)): Documented.
        (toString()): Include new variables.
        (region(int,int)): Implemented.
        (regionStart()): Likewise.
        (regionEnd()): Likewise.
        (hasTransparentBounds()): Likewise.
        (useTransparentBounds(boolean)): Likewise.
        (hasAnchoringBounds()): Likewise.
        (useAnchoringBounds(boolean)): Likewise.

-- 
Andrew :)

Support Free Java!
Contribute to GNU Classpath and the OpenJDK
http://www.gnu.org/software/classpath
http://openjdk.java.net
PGP Key: 94EFD9D8 (http://subkeys.pgp.net)
Fingerprint = F8EF F1EA 401E 2E60 15FA  7927 142C 2591 94EF D9D8
Index: java/util/regex/Matcher.java
===================================================================
RCS file: /sources/classpath/classpath/java/util/regex/Matcher.java,v
retrieving revision 1.20
diff -u -r1.20 Matcher.java
--- java/util/regex/Matcher.java        16 Mar 2008 22:44:41 -0000      1.20
+++ java/util/regex/Matcher.java        12 May 2008 20:34:34 -0000
@@ -61,11 +61,45 @@
   private int appendPosition;
   private REMatch match;
 
+  /**
+   * The start of the region of the input on which to match.
+   */
+  private int regionStart;
+
+  /**
+   * The end of the region of the input on which to match.
+   */
+  private int regionEnd;
+  
+  /**
+   * True if the match process should look beyond the 
+   * region marked by regionStart to regionEnd when
+   * performing lookAhead, lookBehind and boundary
+   * matching.
+   */
+  private boolean transparentBounds;
+
+  /**
+   * The flags that affect the anchoring bounds.
+   * If [EMAIL PROTECTED] #hasAnchoringBounds()} is [EMAIL PROTECTED] true},
+   * the match process will honour the
+   * anchoring bounds: ^, \A, \Z, \z and $.  If
+   * [EMAIL PROTECTED] #hasAnchoringBounds()} is [EMAIL PROTECTED] false},
+   * the anchors are ignored and appropriate flags,
+   * stored in this variable, are used to provide this
+   * behaviour.
+   */
+  private int anchoringBounds;
+
   Matcher(Pattern pattern, CharSequence input)
   {
     this.pattern = pattern;
     this.input = input;
     this.inputCharIndexed = RE.makeCharIndexed(input, 0);
+    regionStart = 0;
+    regionEnd = input.length();
+    transparentBounds = false;
+    anchoringBounds = 0;
   }
   
   /**
@@ -127,7 +161,11 @@
   public boolean find ()
   {
     boolean first = (match == null);
-    match = pattern.getRE().getMatch(inputCharIndexed, position);
+    if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+      match = pattern.getRE().getMatch(inputCharIndexed, position, 
anchoringBounds);
+    else
+      match = pattern.getRE().getMatch(input.subSequence(regionStart, 
regionEnd),
+                                      position, anchoringBounds);
     if (match != null)
       {
        int endIndex = match.getEndIndex();
@@ -158,7 +196,11 @@
    */
   public boolean find (int start)
   {
-    match = pattern.getRE().getMatch(inputCharIndexed, start);
+    if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+      match = pattern.getRE().getMatch(inputCharIndexed, start, 
anchoringBounds);
+    else
+      match = pattern.getRE().getMatch(input.subSequence(regionStart, 
regionEnd),
+                                      start, anchoringBounds);
     if (match != null)
       {
        position = match.getEndIndex();
@@ -220,7 +262,12 @@
  
   public boolean lookingAt ()
   {
-    match = pattern.getRE().getMatch(inputCharIndexed, 0, 
RE.REG_FIX_STARTING_POSITION, null);
+    if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+      match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
+                                      
anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
+    else
+      match = pattern.getRE().getMatch(input.subSequence(regionStart, 
regionEnd), 0,
+                                      
anchoringBounds|RE.REG_FIX_STARTING_POSITION);
     if (match != null)
       {
        if (match.getStartIndex() == 0)
@@ -245,7 +292,12 @@
    */
   public boolean matches ()
   {
-    match = pattern.getRE().getMatch(inputCharIndexed, 0, 
RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION, null);
+    if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
+      match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
+                                      
anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
+    else
+      match = pattern.getRE().getMatch(input.subSequence(regionStart, 
regionEnd), 0,
+                                      
anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION);
     if (match != null)
       {
        if (match.getStartIndex() == 0)
@@ -267,15 +319,39 @@
     return pattern;
   }
   
+  /**
+   * Resets the internal state of the matcher, including
+   * resetting the region to its default state of encompassing
+   * the whole input.  The state of [EMAIL PROTECTED] #hasTransparentBounds()}
+   * and [EMAIL PROTECTED] #hasAnchoringBounds()} are unaffected.
+   *
+   * @return a reference to this matcher.
+   * @see #regionStart()
+   * @see #regionEnd()
+   * @see #hasTransparentBounds()
+   * @see #hasAnchoringBounds()
+   */
   public Matcher reset ()
   {
     position = 0;
     match = null;
+    regionStart = 0;
+    regionEnd = input.length();
     return this;
   }
   
   /**
-   * @param input The new input character sequence
+   * Resets the internal state of the matcher, including
+   * resetting the region to its default state of encompassing
+   * the whole input.  The state of [EMAIL PROTECTED] #hasTransparentBounds()}
+   * and [EMAIL PROTECTED] #hasAnchoringBounds()} are unaffected.
+   *
+   * @param input The new input character sequence.
+   * @return a reference to this matcher.
+   * @see #regionStart()
+   * @see #regionEnd()
+   * @see #hasTransparentBounds()
+   * @see #hasAnchoringBounds()
    */
   public Matcher reset (CharSequence input)
   {
@@ -285,7 +361,7 @@
   }
   
   /**
-   * @returns the index of a capturing group in this matcher's pattern
+   * @return the index of a capturing group in this matcher's pattern
    *
    * @exception IllegalStateException If no match has yet been attempted,
    * or if the previous match operation failed
@@ -314,6 +390,7 @@
 
   /**
    * @return True if and only if the matcher hit the end of input.
+   * @since 1.5
    */
   public boolean hitEnd()
   {
@@ -328,7 +405,9 @@
     CPStringBuilder sb = new CPStringBuilder();
     sb.append(this.getClass().getName())
       .append("[pattern=").append(pattern.pattern())
-      .append(" region=").append("0").append(",").append(input.length())
+      .append(" region=").append(regionStart).append(",").append(regionEnd)
+      .append(" anchoringBounds=").append(anchoringBounds == 0)
+      .append(" transparentBounds=").append(transparentBounds)
       .append(" lastmatch=").append(match == null ? "" : match.toString())
       .append("]");
     return sb.toString();
@@ -338,4 +417,177 @@
   {
     if (match == null) throw new IllegalStateException();
   }
+
+  /**
+   * <p>
+   * Defines the region of the input on which to match.
+   * By default, the [EMAIL PROTECTED] Matcher} attempts to match
+   * the whole string (from 0 to the length of the input),
+   * but a region between [EMAIL PROTECTED] start} (inclusive) and
+   * [EMAIL PROTECTED] end} (exclusive) on which to match may instead
+   * be defined using this method.
+   * </p>
+   * <p>
+   * The behaviour of region matching is further affected
+   * by the use of transparent or opaque bounds (see
+   * [EMAIL PROTECTED] #useTransparentBounds(boolean)}) and whether or not
+   * anchors ([EMAIL PROTECTED] ^} and [EMAIL PROTECTED] $}) are in use
+   * (see [EMAIL PROTECTED] #useAnchoringBounds(boolean)}).  With transparent
+   * bounds, the matcher is aware of input outside the bounds
+   * set by this method, whereas, with opaque bounds (the default)
+   * only the input within the bounds is used.  The use of
+   * anchors are affected by this setting; with transparent
+   * bounds, anchors will match the beginning of the real input,
+   * while with opaque bounds they match the beginning of the
+   * region.  [EMAIL PROTECTED] #useAnchoringBounds(boolean)} can be used
+   * to turn on or off the matching of anchors.
+   * </p>
+   *
+   * @param start the start of the region (inclusive).
+   * @param end the end of the region (exclusive).
+   * @return a reference to this matcher.
+   * @throws IndexOutOfBoundsException if either [EMAIL PROTECTED] start} or
+   *                                   [EMAIL PROTECTED] end} are less than 
zero,
+   *                                   if either [EMAIL PROTECTED] start} or
+   *                                   [EMAIL PROTECTED] end} are greater than 
the
+   *                                   length of the input, or if
+   *                                   [EMAIL PROTECTED] start} is greater than
+   *                                   [EMAIL PROTECTED] end}.
+   * @see #regionStart()
+   * @see #regionEnd()
+   * @see #hasTransparentBounds()
+   * @see #useTransparentBounds(boolean)
+   * @see #hasAnchoringBounds()
+   * @see #useAnchoringBounds(boolean)
+   * @since 1.5
+   */
+  public Matcher region(int start, int end)
+  {
+    int length = input.length();
+    if (start < 0)
+      throw new IndexOutOfBoundsException("The start position was less than 
zero.");
+    if (start >= length)
+      throw new IndexOutOfBoundsException("The start position is after the end 
of the input.");
+    if (end < 0)
+      throw new IndexOutOfBoundsException("The end position was less than 
zero.");
+    if (end > length)
+      throw new IndexOutOfBoundsException("The end position is after the end 
of the input.");
+    if (start > end)
+      throw new IndexOutOfBoundsException("The start position is after the end 
position.");
+    reset();
+    regionStart = start;
+    regionEnd = end;
+    return this;
+  }
+
+  /**
+   * The start of the region on which to perform matches (inclusive).
+   *
+   * @return the start index of the region.
+   * @see #region(int,int)
+   * #see #regionEnd()
+   * @since 1.5
+   */
+  public int regionStart()
+  {
+    return regionStart;
+  }
+  
+  /**
+   * The end of the region on which to perform matches (exclusive).
+   *
+   * @return the end index of the region.
+   * @see #region(int,int)
+   * @see #regionStart()
+   * @since 1.5
+   */
+  public int regionEnd()
+  {
+    return regionEnd;
+  }
+
+  /**
+   * Returns true if the bounds of the region marked by
+   * [EMAIL PROTECTED] #regionStart()} and [EMAIL PROTECTED] #regionEnd()} are
+   * transparent.  When these bounds are transparent, the
+   * matching process can look beyond them to perform
+   * lookahead, lookbehind and boundary matching operations.
+   * By default, the bounds are opaque.
+   *
+   * @return true if the bounds of the matching region are
+   *         transparent.
+   * @see #useTransparentBounds(boolean)
+   * @see #region(int,int)
+   * @see #regionStart()
+   * @see #regionEnd()
+   * @since 1.5
+   */
+  public boolean hasTransparentBounds()
+  {
+    return transparentBounds;
+  }
+
+  /**
+   * Sets the transparency of the bounds of the region
+   * marked by [EMAIL PROTECTED] #regionStart()} and [EMAIL PROTECTED] 
#regionEnd()}.
+   * A value of [EMAIL PROTECTED] true} makes the bounds transparent,
+   * so the matcher can see beyond them to perform lookahead,
+   * lookbehind and boundary matching operations.  A value
+   * of [EMAIL PROTECTED] false} (the default) makes the bounds opaque,
+   * restricting the match to the input region denoted
+   * by [EMAIL PROTECTED] #regionStart()} and [EMAIL PROTECTED] #regionEnd()}.
+   *
+   * @param transparent true if the bounds should be transparent.
+   * @return a reference to this matcher.
+   * @see #hasTransparentBounds()
+   * @see #region(int,int)
+   * @see #regionStart()
+   * @see #regionEnd()
+   * @since 1.5
+   */
+  public Matcher useTransparentBounds(boolean transparent)
+  {
+    transparentBounds = transparent;
+    return this;
+  }
+
+  /**
+   * Returns true if the matcher will honour the use of
+   * the anchoring bounds: [EMAIL PROTECTED] ^}, [EMAIL PROTECTED] \A}, [EMAIL 
PROTECTED] \Z},
+   * [EMAIL PROTECTED] \z} and [EMAIL PROTECTED] $}.  By default, the anchors
+   * are used.  Note that the effect of the anchors is
+   * also affected by [EMAIL PROTECTED] #hasTransparentBounds()}.
+   *
+   * @return true if the matcher will attempt to match
+   *         the anchoring bounds.
+   * @see #useAnchoringBounds(boolean)
+   * @see #hasTransparentBounds()
+   * @since 1.5
+   */
+  public boolean hasAnchoringBounds()
+  {
+    return anchoringBounds == 0;
+  }
+
+  /**
+   * Enables or disables the use of the anchoring bounds:
+   * [EMAIL PROTECTED] ^}, [EMAIL PROTECTED] \A}, [EMAIL PROTECTED] \Z}, 
[EMAIL PROTECTED] \z} and
+   * [EMAIL PROTECTED] $}. By default, their use is enabled.  When
+   * disabled, the matcher will not attempt to match
+   * the anchors.
+   *
+   * @param useAnchors true if anchoring bounds should be used.
+   * @return a reference to this matcher.
+   * @since 1.5
+   * @see #hasAnchoringBounds()
+   */
+  public Matcher useAnchoringBounds(boolean useAnchors)
+  {
+    if (useAnchors)
+      anchoringBounds = 0;
+    else
+      anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL;
+    return this;
+  }
+
 }

Reply via email to