stevencaswell 2004/07/11 09:48:32 Modified: lang/src/java/org/apache/commons/lang StringUtils.java Log: http://issues.apache.org/bugzilla/show_bug.cgi?id=22692 : - added new splitPreserveAllTokens methods to mirror the split functionality, preserving empty tokens indicated by adjacent tokens; - refactored logic of existing split method into splitWorker for sharing by new splitPreserveAllTokens methods Revision Changes Path 1.131 +219 -28 jakarta-commons/lang/src/java/org/apache/commons/lang/StringUtils.java Index: StringUtils.java =================================================================== RCS file: /home/cvs/jakarta-commons/lang/src/java/org/apache/commons/lang/StringUtils.java,v retrieving revision 1.130 retrieving revision 1.131 diff -u -r1.130 -r1.131 --- StringUtils.java 24 May 2004 20:15:44 -0000 1.130 +++ StringUtils.java 11 Jul 2004 16:48:31 -0000 1.131 @@ -1994,6 +1994,150 @@ * @since 2.0 */ public static String[] split(String str, char separatorChar) { + return splitWorker(str, separatorChar, false); + } + + /** + * <p>Splits the provided text into an array, separators specified. + * This is an alternative to using StringTokenizer.</p> + * + * <p>The separator is not included in the returned String array. + * Adjacent separators are treated as one separator. + * For more control over the split use the Tokenizer class.</p> + * + * <p>A <code>null</code> input String returns <code>null</code>. + * A <code>null</code> separatorChars splits on whitespace.</p> + * + * <pre> + * StringUtils.split(null, *) = null + * StringUtils.split("", *) = [] + * StringUtils.split("abc def", null) = ["abc", "def"] + * StringUtils.split("abc def", " ") = ["abc", "def"] + * StringUtils.split("abc def", " ") = ["abc", "def"] + * StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"] + * </pre> + * + * @param str the String to parse, may be null + * @param separatorChars the characters used as the delimiters, + * <code>null</code> splits on whitespace + * @return an array of parsed Strings, <code>null</code> if null String input + */ + public static String[] split(String str, String separatorChars) { + return splitWorker(str, separatorChars, -1, false); + } + + /** + * <p>Splits the provided text into an array with a maximum length, + * separators specified.</p> + * + * <p>The separator is not included in the returned String array. + * Adjacent separators are treated as one separator.</p> + * + * <p>A <code>null</code> input String returns <code>null</code>. + * A <code>null</code> separatorChars splits on whitespace.</p> + * + * <p>If more than <code>max</code> delimited substrings are found, the last + * returned string includes all characters after the first <code>max - 1</code> + * returned strings (including separator characters).</p> + * + * <pre> + * StringUtils.split(null, *, *) = null + * StringUtils.split("", *, *) = [] + * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"] + * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"] + * StringUtils.split("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"] + * StringUtils.split("ab:cd:ef", ":", 2) = ["ab", "cd:ef"] + * </pre> + * + * @param str the String to parse, may be null + * @param separatorChars the characters used as the delimiters, + * <code>null</code> splits on whitespace + * @param max the maximum number of elements to include in the + * array. A zero or negative value implies no limit + * @return an array of parsed Strings, <code>null</code> if null String input + */ + public static String[] split(String str, String separatorChars, int max) { + return splitWorker(str, separatorChars, max, false); + } + + //----------------------------------------------------------------------- + /** + * <p>Splits the provided text into an array, using whitespace as the + * separator, preserving all tokens, including empty tokens created by + * adjacent separators. This is an alternative to using StringTokenizer. + * Whitespace is defined by [EMAIL PROTECTED] Character#isWhitespace(char)}.</p> + * + * <p>The separator is not included in the returned String array. + * Adjacent separators are treated as separators for empty tokens. + * For more control over the split use the Tokenizer class.</p> + * + * <p>A <code>null</code> input String returns <code>null</code>.</p> + * + * <pre> + * StringUtils.splitPreserveAllTokens(null) = null + * StringUtils.splitPreserveAllTokens("") = [] + * StringUtils.splitPreserveAllTokens("abc def") = ["abc", "def"] + * StringUtils.splitPreserveAllTokens("abc def") = ["abc", "", "def"] + * StringUtils.splitPreserveAllTokens(" abc ") = ["", "abc", ""] + * </pre> + * + * @param str the String to parse, may be <code>null</code> + * @return an array of parsed Strings, <code>null</code> if null String input + * @since 2.1 + */ + public static String[] splitPreserveAllTokens(String str) { + return splitWorker(str, null, -1, true); + } + + /** + * <p>Splits the provided text into an array, separator specified, + * preserving all tokens, including empty tokens created by adjacent + * separators. This is an alternative to using StringTokenizer.</p> + * + * <p>The separator is not included in the returned String array. + * Adjacent separators are treated as separators for empty tokens. + * For more control over the split use the Tokenizer class.</p> + * + * <p>A <code>null</code> input String returns <code>null</code>.</p> + * + * <pre> + * StringUtils.splitPreserveAllTokens(null, *) = null + * StringUtils.splitPreserveAllTokens("", *) = [] + * StringUtils.splitPreserveAllTokens("a.b.c", '.') = ["a", "b", "c"] + * StringUtils.splitPreserveAllTokens("a..b.c", '.') = ["a", "b", "c"] + * StringUtils.splitPreserveAllTokens("a:b:c", '.') = ["a:b:c"] + * StringUtils.splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"] + * StringUtils.splitPreserveAllTokens("a b c", ' ') = ["a", "b", "c"] + * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", ""] + * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", "", ""] + * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", a", "b", "c"] + * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", "", a", "b", "c"] + * StringUtils.splitPreserveAllTokens(" a b c ", ' ') = ["", a", "b", "c", ""] + * </pre> + * + * @param str the String to parse, may be <code>null</code> + * @param separatorChar the character used as the delimiter, + * <code>null</code> splits on whitespace + * @return an array of parsed Strings, <code>null</code> if null String input + * @since 2.1 + */ + public static String[] splitPreserveAllTokens(String str, char separatorChar) { + return splitWorker(str, separatorChar, true); + } + + /** + * Performs the logic for the <code>split</code> and + * <code>splitPreserveAllTokens</code> methods that do not return a + * maximum array length. + * + * @param str the String to parse, may be <code>null</code> + * @param separatorChar the separate character + * @param preserveAllTokens if <code>true</code>, adjacent separators are + * treated as empty token separators; if <code>false</code>, adjacent + * separators are treated as one separator. + * @return an array of parsed Strings, <code>null</code> if null String input + */ + private static String[] splitWorker(String str, char separatorChar, boolean preserveAllTokens) { // Performance tuned for 2.0 (JDK1.4) if (str == null) { @@ -2006,58 +2150,71 @@ List list = new ArrayList(); int i = 0, start = 0; boolean match = false; + boolean lastMatch = false; while (i < len) { if (str.charAt(i) == separatorChar) { - if (match) { + if (match || preserveAllTokens) { list.add(str.substring(start, i)); match = false; + lastMatch = true; } start = ++i; continue; + } else { + lastMatch = false; } match = true; i++; } - if (match) { + if (match || (preserveAllTokens && lastMatch)) { list.add(str.substring(start, i)); } return (String[]) list.toArray(new String[list.size()]); } /** - * <p>Splits the provided text into an array, separators specified. - * This is an alternative to using StringTokenizer.</p> + * <p>Splits the provided text into an array, separators specified, + * preserving all tokens, including empty tokens created by adjacent + * separators. This is an alternative to using StringTokenizer.</p> * * <p>The separator is not included in the returned String array. - * Adjacent separators are treated as one separator. + * Adjacent separators are treated as separators for empty tokens. * For more control over the split use the Tokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>. * A <code>null</code> separatorChars splits on whitespace.</p> * * <pre> - * StringUtils.split(null, *) = null - * StringUtils.split("", *) = [] - * StringUtils.split("abc def", null) = ["abc", "def"] - * StringUtils.split("abc def", " ") = ["abc", "def"] - * StringUtils.split("abc def", " ") = ["abc", "def"] - * StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"] + * StringUtils.splitPreserveAllTokens(null, *) = null + * StringUtils.splitPreserveAllTokens("", *) = [] + * StringUtils.splitPreserveAllTokens("abc def", null) = ["abc", "def"] + * StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "def"] + * StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "", def"] + * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"] + * StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":") = ["ab", "cd", "ef", ""] + * StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""] + * StringUtils.splitPreserveAllTokens("ab::cd:ef", ":") = ["ab", "", cd", "ef"] + * StringUtils.splitPreserveAllTokens(":cd:ef", ":") = ["", cd", "ef"] + * StringUtils.splitPreserveAllTokens("::cd:ef", ":") = ["", "", cd", "ef"] + * StringUtils.splitPreserveAllTokens(":cd:ef:", ":") = ["", cd", "ef", ""] * </pre> * - * @param str the String to parse, may be null + * @param str the String to parse, may be <code>null</code> * @param separatorChars the characters used as the delimiters, * <code>null</code> splits on whitespace * @return an array of parsed Strings, <code>null</code> if null String input */ - public static String[] split(String str, String separatorChars) { - return split(str, separatorChars, -1); + public static String[] splitPreserveAllTokens(String str, String separatorChars) { + return splitWorker(str, separatorChars, -1, true); } /** * <p>Splits the provided text into an array with a maximum length, - * separators specified.</p> + * separators specified, preserving all tokens, including empty tokens + * created by adjacent separators.</p> * * <p>The separator is not included in the returned String array. + * Adjacent separators are treated as separators for empty tokens. * Adjacent separators are treated as one separator.</p> * * <p>A <code>null</code> input String returns <code>null</code>. @@ -2068,22 +2225,43 @@ * returned strings (including separator characters).</p> * * <pre> - * StringUtils.split(null, *, *) = null - * StringUtils.split("", *, *) = [] - * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"] - * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"] - * StringUtils.split("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"] - * StringUtils.split("ab:cd:ef", ":", 2) = ["ab", "cd:ef"] + * StringUtils.splitPreserveAllTokens(null, *, *) = null + * StringUtils.splitPreserveAllTokens("", *, *) = [] + * StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"] + * StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"] + * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"] + * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"] + * StringUtils.splitPreserveAllTokens("ab de fg", null, 2) = ["ab", " de fg"] + * StringUtils.splitPreserveAllTokens("ab de fg", null, 3) = ["ab", "", " de fg"] + * StringUtils.splitPreserveAllTokens("ab de fg", null, 4) = ["ab", "", "", "de fg"] * </pre> * - * @param str the String to parse, may be null + * @param str the String to parse, may be <code>null</code> * @param separatorChars the characters used as the delimiters, * <code>null</code> splits on whitespace * @param max the maximum number of elements to include in the * array. A zero or negative value implies no limit * @return an array of parsed Strings, <code>null</code> if null String input */ - public static String[] split(String str, String separatorChars, int max) { + public static String[] splitPreserveAllTokens(String str, String separatorChars, int max) { + return splitWorker(str, separatorChars, max, true); + } + + /** + * Performs the logic for the <code>split</code> and + * <code>splitPreserveAllTokens</code> methods that return a maximum array + * length. + * + * @param str the String to parse, may be <code>null</code> + * @param separatorChars the separate character + * @param max the maximum number of elements to include in the + * array. A zero or negative value implies no limit. + * @param preserveAllTokens if <code>true</code>, adjacent separators are + * treated as empty token separators; if <code>false</code>, adjacent + * separators are treated as one separator. + * @return an array of parsed Strings, <code>null</code> if null String input + */ + private static String[] splitWorker(String str, String separatorChars, int max, boolean preserveAllTokens) { // Performance tuned for 2.0 (JDK1.4) // Direct code is quicker than StringTokenizer. // Also, StringTokenizer uses isSpace() not isWhitespace() @@ -2099,19 +2277,24 @@ int sizePlus1 = 1; int i = 0, start = 0; boolean match = false; + boolean lastMatch = false; if (separatorChars == null) { // Null separator means use whitespace while (i < len) { if (Character.isWhitespace(str.charAt(i))) { - if (match) { + if (match || preserveAllTokens) { + lastMatch = true; if (sizePlus1++ == max) { i = len; + lastMatch = false; } list.add(str.substring(start, i)); match = false; } start = ++i; continue; + } else { + lastMatch = false; } match = true; i++; @@ -2121,15 +2304,19 @@ char sep = separatorChars.charAt(0); while (i < len) { if (str.charAt(i) == sep) { - if (match) { + if (match || preserveAllTokens) { + lastMatch = true; if (sizePlus1++ == max) { i = len; + lastMatch = false; } list.add(str.substring(start, i)); match = false; } start = ++i; continue; + } else { + lastMatch = false; } match = true; i++; @@ -2138,21 +2325,25 @@ // standard case while (i < len) { if (separatorChars.indexOf(str.charAt(i)) >= 0) { - if (match) { + if (match || preserveAllTokens) { + lastMatch = true; if (sizePlus1++ == max) { i = len; + lastMatch = false; } list.add(str.substring(start, i)); match = false; } start = ++i; continue; + } else { + lastMatch = false; } match = true; i++; } } - if (match) { + if (match || (preserveAllTokens && lastMatch)) { list.add(str.substring(start, i)); } return (String[]) list.toArray(new String[list.size()]);
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]