Github user viirya commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22227#discussion_r212783481
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
 ---
    @@ -232,30 +232,41 @@ case class RLike(left: Expression, right: Expression) 
extends StringRegexExpress
      * Splits str around pat (pattern is a regular expression).
      */
     @ExpressionDescription(
    -  usage = "_FUNC_(str, regex) - Splits `str` around occurrences that match 
`regex`.",
    +  usage = "_FUNC_(str, regex, limit) - Splits `str` around occurrences 
that match `regex`." +
    +    "The `limit` parameter controls the number of times the pattern is 
applied and " +
    +    "therefore affects the length of the resulting array. If the limit n 
is " +
    +    "greater than zero then the pattern will be applied at most n - 1 
times, " +
    +    "the array's length will be no greater than n, and the array's last 
entry " +
    +    "will contain all input beyond the last matched delimiter. If n is " +
    +    "non-positive then the pattern will be applied as many times as " +
    +    "possible and the array can have any length. If n is zero then the " +
    +    "pattern will be applied as many times as possible, the array can " +
    +    "have any length, and trailing empty strings will be discarded.",
       examples = """
         Examples:
    -      > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]');
    +      > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]', -1);
            ["one","two","three",""]
    +|      > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]', 2);
    + |       ["one","twoBthreeC"]
       """)
    -case class StringSplit(str: Expression, pattern: Expression)
    -  extends BinaryExpression with ImplicitCastInputTypes {
    +case class StringSplit(str: Expression, pattern: Expression, limit: 
Expression)
    +  extends TernaryExpression with ImplicitCastInputTypes {
     
    -  override def left: Expression = str
    -  override def right: Expression = pattern
       override def dataType: DataType = ArrayType(StringType)
    -  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
    +  override def inputTypes: Seq[DataType] = Seq(StringType, StringType, 
IntegerType)
    +  override def children: Seq[Expression] = str :: pattern :: limit :: Nil
     
    -  override def nullSafeEval(string: Any, regex: Any): Any = {
    -    val strings = 
string.asInstanceOf[UTF8String].split(regex.asInstanceOf[UTF8String], -1)
    +  override def nullSafeEval(string: Any, regex: Any, limit: Any): Any = {
    --- End diff --
    
    I think we still need to do some check on `limit`. According to Presto 
document, `limit` must be a positive number. -1 is only used when no `limit` 
parameter is given (default value).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to