uros-db commented on code in PR #45978: URL: https://github.com/apache/spark/pull/45978#discussion_r1560453598
########## common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java: ########## @@ -172,19 +183,31 @@ public Collation( } /** - * Auxiliary methods for collation aware string operations. + * Returns a StringSearch object for the given pattern and target strings, under collation + * rules corresponding to the given collationId. The external ICU library StringSearch object can + * be used to find occurrences of the pattern in the target string, while respecting collation. */ - public static StringSearch getStringSearch( - final UTF8String left, - final UTF8String right, + final UTF8String targetUTF8String, + final UTF8String patternUTF8String, final int collationId) { - String pattern = right.toString(); - CharacterIterator target = new StringCharacterIterator(left.toString()); + String pattern = patternUTF8String.toString(); + CharacterIterator target = new StringCharacterIterator(targetUTF8String.toString()); Collator collator = CollationFactory.fetchCollation(collationId).collator; return new StringSearch(pattern, target, (RuleBasedCollator) collator); } + /** + * Returns a collation-unaware StringSearch object for the given pattern and target strings. + * While this object does not respect collation, it can be used to find occurrences of the pattern + * in the target string for UTF8_BINARY or UTF8_BINARY_LCASE (if arguments are lowercased). + */ + public static StringSearch getStringSearch( Review Comment: it will be needed for custom UTF8_BINARY_LCASE implementations for certain expressions (for example: StringReplace, StringTranslate) since UTF8_BINARY_LCASE doesn't have an ICU collator instance, we can only call `.toLowerCase` on both arguments and then use this raw (binary) `StringSearch` instance to implement those expressions -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org