lang StringUtils.java

stevencaswell Sun, 11 Jul 2004 09:49:08 -0700

stevencaswell    2004/07/11 09:48:32

  Modified:    lang/src/java/org/apache/commons/lang StringUtils.java
  Log:
  http://issues.apache.org/bugzilla/show_bug.cgi?id=22692 :
  - added new splitPreserveAllTokens methods to mirror the split functionality, 
preserving empty tokens indicated by adjacent tokens;
  - refactored logic of existing split method into splitWorker for sharing by new 
splitPreserveAllTokens methods
  
  Revision  Changes    Path
  1.131     +219 -28   
jakarta-commons/lang/src/java/org/apache/commons/lang/StringUtils.java
  
  Index: StringUtils.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/lang/src/java/org/apache/commons/lang/StringUtils.java,v
  retrieving revision 1.130
  retrieving revision 1.131
  diff -u -r1.130 -r1.131
  --- StringUtils.java  24 May 2004 20:15:44 -0000      1.130
  +++ StringUtils.java  11 Jul 2004 16:48:31 -0000      1.131
  @@ -1994,6 +1994,150 @@
        * @since 2.0
        */
       public static String[] split(String str, char separatorChar) {
  +        return splitWorker(str, separatorChar, false);
  +    }
  +
  +    /**
  +     * <p>Splits the provided text into an array, separators specified.
  +     * This is an alternative to using StringTokenizer.</p>
  +     *
  +     * <p>The separator is not included in the returned String array.
  +     * Adjacent separators are treated as one separator.
  +     * For more control over the split use the Tokenizer class.</p>
  +     *
  +     * <p>A <code>null</code> input String returns <code>null</code>.
  +     * A <code>null</code> separatorChars splits on whitespace.</p>
  +     *
  +     * <pre>
  +     * StringUtils.split(null, *)         = null
  +     * StringUtils.split("", *)           = []
  +     * StringUtils.split("abc def", null) = ["abc", "def"]
  +     * StringUtils.split("abc def", " ")  = ["abc", "def"]
  +     * StringUtils.split("abc  def", " ") = ["abc", "def"]
  +     * StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"]
  +     * </pre>
  +     *
  +     * @param str  the String to parse, may be null
  +     * @param separatorChars  the characters used as the delimiters,
  +     *  <code>null</code> splits on whitespace
  +     * @return an array of parsed Strings, <code>null</code> if null String input
  +     */
  +    public static String[] split(String str, String separatorChars) {
  +        return splitWorker(str, separatorChars, -1, false);
  +    }
  +
  +    /**
  +     * <p>Splits the provided text into an array with a maximum length,
  +     * separators specified.</p>
  +     *
  +     * <p>The separator is not included in the returned String array.
  +     * Adjacent separators are treated as one separator.</p>
  +     *
  +     * <p>A <code>null</code> input String returns <code>null</code>.
  +     * A <code>null</code> separatorChars splits on whitespace.</p>
  +     *
  +     * <p>If more than <code>max</code> delimited substrings are found, the last
  +     * returned string includes all characters after the first <code>max - 1</code>
  +     * returned strings (including separator characters).</p>
  +     *
  +     * <pre>
  +     * StringUtils.split(null, *, *)            = null
  +     * StringUtils.split("", *, *)              = []
  +     * StringUtils.split("ab de fg", null, 0)   = ["ab", "cd", "ef"]
  +     * StringUtils.split("ab   de fg", null, 0) = ["ab", "cd", "ef"]
  +     * StringUtils.split("ab:cd:ef", ":", 0)    = ["ab", "cd", "ef"]
  +     * StringUtils.split("ab:cd:ef", ":", 2)    = ["ab", "cd:ef"]
  +     * </pre>
  +     *
  +     * @param str  the String to parse, may be null
  +     * @param separatorChars  the characters used as the delimiters,
  +     *  <code>null</code> splits on whitespace
  +     * @param max  the maximum number of elements to include in the
  +     *  array. A zero or negative value implies no limit
  +     * @return an array of parsed Strings, <code>null</code> if null String input
  +     */
  +    public static String[] split(String str, String separatorChars, int max) {
  +        return splitWorker(str, separatorChars, max, false);
  +    }
  +
  +    //-----------------------------------------------------------------------
  +    /**
  +     * <p>Splits the provided text into an array, using whitespace as the
  +     * separator, preserving all tokens, including empty tokens created by 
  +     * adjacent separators. This is an alternative to using StringTokenizer.
  +     * Whitespace is defined by [EMAIL PROTECTED] Character#isWhitespace(char)}.</p>
  +     *
  +     * <p>The separator is not included in the returned String array.
  +     * Adjacent separators are treated as separators for empty tokens.
  +     * For more control over the split use the Tokenizer class.</p>
  +     *
  +     * <p>A <code>null</code> input String returns <code>null</code>.</p>
  +     *
  +     * <pre>
  +     * StringUtils.splitPreserveAllTokens(null)       = null
  +     * StringUtils.splitPreserveAllTokens("")         = []
  +     * StringUtils.splitPreserveAllTokens("abc def")  = ["abc", "def"]
  +     * StringUtils.splitPreserveAllTokens("abc  def") = ["abc", "", "def"]
  +     * StringUtils.splitPreserveAllTokens(" abc ")    = ["", "abc", ""]
  +     * </pre>
  +     *
  +     * @param str  the String to parse, may be <code>null</code>
  +     * @return an array of parsed Strings, <code>null</code> if null String input
  +     * @since 2.1
  +     */
  +    public static String[] splitPreserveAllTokens(String str) {
  +        return splitWorker(str, null, -1, true);
  +    }
  +
  +    /**
  +     * <p>Splits the provided text into an array, separator specified,
  +     * preserving all tokens, including empty tokens created by adjacent
  +     * separators. This is an alternative to using StringTokenizer.</p>
  +     *
  +     * <p>The separator is not included in the returned String array.
  +     * Adjacent separators are treated as separators for empty tokens.
  +     * For more control over the split use the Tokenizer class.</p>
  +     *
  +     * <p>A <code>null</code> input String returns <code>null</code>.</p>
  +     *
  +     * <pre>
  +     * StringUtils.splitPreserveAllTokens(null, *)         = null
  +     * StringUtils.splitPreserveAllTokens("", *)           = []
  +     * StringUtils.splitPreserveAllTokens("a.b.c", '.')    = ["a", "b", "c"]
  +     * StringUtils.splitPreserveAllTokens("a..b.c", '.')   = ["a", "b", "c"]
  +     * StringUtils.splitPreserveAllTokens("a:b:c", '.')    = ["a:b:c"]
  +     * StringUtils.splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"]
  +     * StringUtils.splitPreserveAllTokens("a b c", ' ')    = ["a", "b", "c"]
  +     * StringUtils.splitPreserveAllTokens("a b c ", ' ')   = ["a", "b", "c", ""]
  +     * StringUtils.splitPreserveAllTokens("a b c ", ' ')   = ["a", "b", "c", "", ""]
  +     * StringUtils.splitPreserveAllTokens(" a b c", ' ')   = ["", a", "b", "c"]
  +     * StringUtils.splitPreserveAllTokens("  a b c", ' ')  = ["", "", a", "b", "c"]
  +     * StringUtils.splitPreserveAllTokens(" a b c ", ' ')  = ["", a", "b", "c", ""]
  +     * </pre>
  +     *
  +     * @param str  the String to parse, may be <code>null</code>
  +     * @param separatorChar  the character used as the delimiter,
  +     *  <code>null</code> splits on whitespace
  +     * @return an array of parsed Strings, <code>null</code> if null String input
  +     * @since 2.1
  +     */
  +    public static String[] splitPreserveAllTokens(String str, char separatorChar) {
  +        return splitWorker(str, separatorChar, true);
  +    }
  +
  +    /**
  +     * Performs the logic for the <code>split</code> and 
  +     * <code>splitPreserveAllTokens</code> methods that do not return a
  +     * maximum array length.
  +     *
  +     * @param str  the String to parse, may be <code>null</code>
  +     * @param separatorChar the separate character
  +     * @param preserveAllTokens if <code>true</code>, adjacent separators are
  +     * treated as empty token separators; if <code>false</code>, adjacent
  +     * separators are treated as one separator.
  +     * @return an array of parsed Strings, <code>null</code> if null String input
  +     */
  +    private static String[] splitWorker(String str, char separatorChar, boolean 
preserveAllTokens) {
           // Performance tuned for 2.0 (JDK1.4)
   
           if (str == null) {
  @@ -2006,58 +2150,71 @@
           List list = new ArrayList();
           int i = 0, start = 0;
           boolean match = false;
  +        boolean lastMatch = false;
           while (i < len) {
               if (str.charAt(i) == separatorChar) {
  -                if (match) {
  +                if (match || preserveAllTokens) {
                       list.add(str.substring(start, i));
                       match = false;
  +                    lastMatch = true;
                   }
                   start = ++i;
                   continue;
  +            } else {
  +                lastMatch = false;
               }
               match = true;
               i++;
           }
  -        if (match) {
  +        if (match || (preserveAllTokens && lastMatch)) {
               list.add(str.substring(start, i));
           }
           return (String[]) list.toArray(new String[list.size()]);
       }
   
       /**
  -     * <p>Splits the provided text into an array, separators specified.
  -     * This is an alternative to using StringTokenizer.</p>
  +     * <p>Splits the provided text into an array, separators specified, 
  +     * preserving all tokens, including empty tokens created by adjacent
  +     * separators. This is an alternative to using StringTokenizer.</p>
        *
        * <p>The separator is not included in the returned String array.
  -     * Adjacent separators are treated as one separator.
  +     * Adjacent separators are treated as separators for empty tokens.
        * For more control over the split use the Tokenizer class.</p>
        *
        * <p>A <code>null</code> input String returns <code>null</code>.
        * A <code>null</code> separatorChars splits on whitespace.</p>
        *
        * <pre>
  -     * StringUtils.split(null, *)         = null
  -     * StringUtils.split("", *)           = []
  -     * StringUtils.split("abc def", null) = ["abc", "def"]
  -     * StringUtils.split("abc def", " ")  = ["abc", "def"]
  -     * StringUtils.split("abc  def", " ") = ["abc", "def"]
  -     * StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"]
  +     * StringUtils.splitPreserveAllTokens(null, *)           = null
  +     * StringUtils.splitPreserveAllTokens("", *)             = []
  +     * StringUtils.splitPreserveAllTokens("abc def", null)   = ["abc", "def"]
  +     * StringUtils.splitPreserveAllTokens("abc def", " ")    = ["abc", "def"]
  +     * StringUtils.splitPreserveAllTokens("abc  def", " ")   = ["abc", "", def"]
  +     * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":")   = ["ab", "cd", "ef"]
  +     * StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":")  = ["ab", "cd", "ef", 
""]
  +     * StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", 
"", ""]
  +     * StringUtils.splitPreserveAllTokens("ab::cd:ef", ":")  = ["ab", "", cd", "ef"]
  +     * StringUtils.splitPreserveAllTokens(":cd:ef", ":")     = ["", cd", "ef"]
  +     * StringUtils.splitPreserveAllTokens("::cd:ef", ":")    = ["", "", cd", "ef"]
  +     * StringUtils.splitPreserveAllTokens(":cd:ef:", ":")    = ["", cd", "ef", ""]
        * </pre>
        *
  -     * @param str  the String to parse, may be null
  +     * @param str  the String to parse, may be <code>null</code>
        * @param separatorChars  the characters used as the delimiters,
        *  <code>null</code> splits on whitespace
        * @return an array of parsed Strings, <code>null</code> if null String input
        */
  -    public static String[] split(String str, String separatorChars) {
  -        return split(str, separatorChars, -1);
  +    public static String[] splitPreserveAllTokens(String str, String 
separatorChars) {
  +        return splitWorker(str, separatorChars, -1, true);
       }
   
       /**
        * <p>Splits the provided text into an array with a maximum length,
  -     * separators specified.</p>
  +     * separators specified, preserving all tokens, including empty tokens 
  +     * created by adjacent separators.</p>
        *
        * <p>The separator is not included in the returned String array.
  +     * Adjacent separators are treated as separators for empty tokens.
        * Adjacent separators are treated as one separator.</p>
        *
        * <p>A <code>null</code> input String returns <code>null</code>.
  @@ -2068,22 +2225,43 @@
        * returned strings (including separator characters).</p>
        *
        * <pre>
  -     * StringUtils.split(null, *, *)            = null
  -     * StringUtils.split("", *, *)              = []
  -     * StringUtils.split("ab de fg", null, 0)   = ["ab", "cd", "ef"]
  -     * StringUtils.split("ab   de fg", null, 0) = ["ab", "cd", "ef"]
  -     * StringUtils.split("ab:cd:ef", ":", 0)    = ["ab", "cd", "ef"]
  -     * StringUtils.split("ab:cd:ef", ":", 2)    = ["ab", "cd:ef"]
  +     * StringUtils.splitPreserveAllTokens(null, *, *)            = null
  +     * StringUtils.splitPreserveAllTokens("", *, *)              = []
  +     * StringUtils.splitPreserveAllTokens("ab de fg", null, 0)   = ["ab", "cd", 
"ef"]
  +     * StringUtils.splitPreserveAllTokens("ab   de fg", null, 0) = ["ab", "cd", 
"ef"]
  +     * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0)    = ["ab", "cd", 
"ef"]
  +     * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2)    = ["ab", "cd:ef"]
  +     * StringUtils.splitPreserveAllTokens("ab   de fg", null, 2) = ["ab", "  de fg"]
  +     * StringUtils.splitPreserveAllTokens("ab   de fg", null, 3) = ["ab", "", " de 
fg"]
  +     * StringUtils.splitPreserveAllTokens("ab   de fg", null, 4) = ["ab", "", "", 
"de fg"]
        * </pre>
        *
  -     * @param str  the String to parse, may be null
  +     * @param str  the String to parse, may be <code>null</code>
        * @param separatorChars  the characters used as the delimiters,
        *  <code>null</code> splits on whitespace
        * @param max  the maximum number of elements to include in the
        *  array. A zero or negative value implies no limit
        * @return an array of parsed Strings, <code>null</code> if null String input
        */
  -    public static String[] split(String str, String separatorChars, int max) {
  +    public static String[] splitPreserveAllTokens(String str, String 
separatorChars, int max) {
  +        return splitWorker(str, separatorChars, max, true);
  +    }
  +
  +    /**
  +     * Performs the logic for the <code>split</code> and 
  +     * <code>splitPreserveAllTokens</code> methods that return a maximum array 
  +     * length.
  +     *
  +     * @param str  the String to parse, may be <code>null</code>
  +     * @param separatorChars the separate character
  +     * @param max  the maximum number of elements to include in the
  +     *  array. A zero or negative value implies no limit.
  +     * @param preserveAllTokens if <code>true</code>, adjacent separators are
  +     * treated as empty token separators; if <code>false</code>, adjacent
  +     * separators are treated as one separator.
  +     * @return an array of parsed Strings, <code>null</code> if null String input
  +     */
  +    private static String[] splitWorker(String str, String separatorChars, int max, 
boolean preserveAllTokens) {
           // Performance tuned for 2.0 (JDK1.4)
           // Direct code is quicker than StringTokenizer.
           // Also, StringTokenizer uses isSpace() not isWhitespace()
  @@ -2099,19 +2277,24 @@
           int sizePlus1 = 1;
           int i = 0, start = 0;
           boolean match = false;
  +        boolean lastMatch = false;
           if (separatorChars == null) {
               // Null separator means use whitespace
               while (i < len) {
                   if (Character.isWhitespace(str.charAt(i))) {
  -                    if (match) {
  +                    if (match || preserveAllTokens) {
  +                        lastMatch = true;
                           if (sizePlus1++ == max) {
                               i = len;
  +                            lastMatch = false;
                           }
                           list.add(str.substring(start, i));
                           match = false;
                       }
                       start = ++i;
                       continue;
  +                } else {
  +                    lastMatch = false;
                   }
                   match = true;
                   i++;
  @@ -2121,15 +2304,19 @@
               char sep = separatorChars.charAt(0);
               while (i < len) {
                   if (str.charAt(i) == sep) {
  -                    if (match) {
  +                    if (match || preserveAllTokens) {
  +                        lastMatch = true;
                           if (sizePlus1++ == max) {
                               i = len;
  +                            lastMatch = false;
                           }
                           list.add(str.substring(start, i));
                           match = false;
                       }
                       start = ++i;
                       continue;
  +                } else {
  +                    lastMatch = false;
                   }
                   match = true;
                   i++;
  @@ -2138,21 +2325,25 @@
               // standard case
               while (i < len) {
                   if (separatorChars.indexOf(str.charAt(i)) >= 0) {
  -                    if (match) {
  +                    if (match || preserveAllTokens) {
  +                        lastMatch = true;
                           if (sizePlus1++ == max) {
                               i = len;
  +                            lastMatch = false;
                           }
                           list.add(str.substring(start, i));
                           match = false;
                       }
                       start = ++i;
                       continue;
  +                } else {
  +                    lastMatch = false;
                   }
                   match = true;
                   i++;
               }
           }
  -        if (match) {
  +        if (match || (preserveAllTokens && lastMatch)) {
               list.add(str.substring(start, i));
           }
           return (String[]) list.toArray(new String[list.size()]);


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: jakarta-commons/lang/src/java/org/apache/commons/lang StringUtils.java

Reply via email to