[GitHub] [spark] zhengruifeng commented on a diff in pull request #38947: [SPARK-41233][SQL] Add `array_prepend` function

via GitHub Mon, 27 Feb 2023 22:14:13 -0800


zhengruifeng commented on code in PR #38947:
URL: https://github.com/apache/spark/pull/38947#discussion_r1119610173



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##########
@@ -1399,6 +1399,145 @@ case class ArrayContains(left: Expression, right: 
Expression)
     copy(left = newLeft, right = newRight)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage =

Review Comment:
   please also document the null handling like `ArrayApend`:
   
   ```
    Type of element should be similar to type of the elements of the array.
         Null element is also appended into the array. But if the array passed, 
is NULL
         output is NULL
   ```
   
   examples
   ```
         > SELECT _FUNC_(array(1, 2, 3, null), null);
          [null,1,2,3,null]
         > SELECT _FUNC_(CAST(null as Array<Int>), 2);
          NULL
   ```
   



##########
python/pyspark/sql/functions.py:
##########
@@ -7619,6 +7619,36 @@ def get(col: "ColumnOrName", index: 
Union["ColumnOrName", int]) -> Column:
     return _invoke_function_over_columns("get", col, index)
 
 
+@try_remote_functions
+def array_prepend(col: "ColumnOrName", element: Any) -> Column:
+    """
+    Collection function: Returns an array containing element as
+    well as all elements from array. The new element is positioned
+    at the beginning of the array.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        name of column containing array
+    element :
+        element to be prepended to the array
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array excluding given value.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([([2, 3, 4],), ([],)], ['data'])
+    >>> df.select(array_prepend(df.data, 1)).collect()
+    [Row(array_prepend(data, 1)=[1, 2, 3, 4]), Row(array_prepend(data, 1)=[1])]
+    """
+    return _invoke_function("array_prepend", _to_java_column(col), element)

Review Comment:
   ```suggestion
       return _invoke_function_over_columns("array_prepend", col, lit(value))
   ```



##########
python/pyspark/sql/functions.py:
##########
@@ -7619,6 +7619,36 @@ def get(col: "ColumnOrName", index: 
Union["ColumnOrName", int]) -> Column:
     return _invoke_function_over_columns("get", col, index)
 
 
+@try_remote_functions
+def array_prepend(col: "ColumnOrName", element: Any) -> Column:
+    """
+    Collection function: Returns an array containing element as
+    well as all elements from array. The new element is positioned
+    at the beginning of the array.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        name of column containing array
+    element :
+        element to be prepended to the array

Review Comment:
   ```suggestion
       value :
           a literal value, or a :class:`~pyspark.sql.Column` expression.
   ```



##########
python/pyspark/sql/functions.py:
##########
@@ -7619,6 +7619,36 @@ def get(col: "ColumnOrName", index: 
Union["ColumnOrName", int]) -> Column:
     return _invoke_function_over_columns("get", col, index)
 
 
+@try_remote_functions
+def array_prepend(col: "ColumnOrName", element: Any) -> Column:

Review Comment:
   ```suggestion
   def array_prepend(col: "ColumnOrName", value: Any) -> Column:
   ```
   
   to be consistent with `array_append`



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##########
@@ -1399,6 +1399,145 @@ case class ArrayContains(left: Expression, right: 
Expression)
     copy(left = newLeft, right = newRight)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage =
+    "_FUNC_(array, value) - Returns an array containing value as well as all 
elements from array. The new element is positioned at the beginning of the 
array.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+       ["d","b","d","c","a"]
+  """,
+  group = "array_funcs",
+  since = "3.4.0")
+case class ArrayPrepend(left: Expression, right: Expression)
+  extends BinaryExpression
+    with ImplicitCastInputTypes
+    with ComplexTypeMergingExpression
+    with QueryErrorsBase {
+
+  override def nullable: Boolean = left.nullable
+
+  @transient protected lazy val elementType: DataType =
+    inputTypes.head.asInstanceOf[ArrayType].elementType
+
+  override def eval(input: InternalRow): Any = {
+    val value1 = left.eval(input)
+    if (value1 == null) {
+      null
+    } else {
+      val value2 = right.eval(input)
+      nullSafeEval(value1, value2)
+    }
+  }
+  override def nullSafeEval(arr: Any, elementData: Any): Any = {
+    val arrayData = arr.asInstanceOf[ArrayData]
+    val numberOfElements = arrayData.numElements() + 1
+    if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+      throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+    }
+    val finalData = new Array[Any](numberOfElements)
+    finalData.update(0, elementData)
+    arrayData.foreach(elementType, (i: Int, v: Any) => finalData.update(i + 1, 
v))
+    new GenericArrayData(finalData)
+  }
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
+    val leftGen = left.genCode(ctx)
+    val rightGen = right.genCode(ctx)
+    val f = (arr: String, value: String) => {
+      val newArraySize = ctx.freshName("newArraySize")
+      val newArray = ctx.freshName("newArray")
+      val i = ctx.freshName("i")
+      val pos = ctx.freshName("pos")

Review Comment:
   I guess we only need one of var `i` and `pos`? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] zhengruifeng commented on a diff in pull request #38947: [SPARK-41233][SQL] Add `array_prepend` function

Reply via email to