[ https://issues.apache.org/jira/browse/TEXT-98?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16098884#comment-16098884 ]
ASF GitHub Bot commented on TEXT-98: ------------------------------------ Github user ameyjadiye commented on a diff in the pull request: https://github.com/apache/commons-text/pull/57#discussion_r129102692 --- Diff: src/main/java/org/apache/commons/text/WordUtils.java --- @@ -747,45 +750,29 @@ public static boolean containsAllWords(final CharSequence word, final CharSequen return true; } - //----------------------------------------------------------------------- + // ----------------------------------------------------------------------- /** - * Is the character a delimiter. + * <p> + * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default + * value if delimiters is null. The generated hash set provides O(1) lookup time. + * </p> * - * @param ch the character to check - * @param delimiters the delimiters - * @return true if it is a delimiter + * @param delimiters set of characters to determine capitalization, null means whitespace + * @return Set<Integer> */ - public static boolean isDelimiter(final char ch, final char[] delimiters) { - if (delimiters == null) { - return Character.isWhitespace(ch); - } - for (final char delimiter : delimiters) { - if (ch == delimiter) { - return true; + private static Set<Integer> generateDelimiterSet(final char[] delimiters) { + Set<Integer> delimiterHashSet = new HashSet<>(); + if (delimiters == null || delimiters.length == 0) { + if (delimiters == null) { + delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0)); } + return delimiterHashSet; } - return false; - } - //----------------------------------------------------------------------- - /** - * Is the codePoint a delimiter. - * - * @param codePoint the codePint to check - * @param delimiters the delimiters - * @return true if it is a delimiter - */ - public static boolean isDelimiter(final int codePoint, final char[] delimiters) { --- End diff -- Rather removing we should keep this method. > Remove isDelimiter() and use HashSets for delimiter check > --------------------------------------------------------- > > Key: TEXT-98 > URL: https://issues.apache.org/jira/browse/TEXT-98 > Project: Commons Text > Issue Type: Improvement > Affects Versions: 1.1 > Reporter: Arun Vinud > Priority: Minor > Fix For: 1.2 > > > The current implementation of *capitalize*, *uncapitalize* and *initials* in > *WordUtils* calls *isDelimiter* for every character and/or codepoint and > isDelimiter loops through the array of delimiters to check for the > occurrence. This is a bit inefficient and results in O(nk) complexity and it > can be reduced to O( n )[if n>k] or O( k ) [if k>n]. -- This message was sent by Atlassian JIRA (v6.4.14#64029)