zhengruifeng commented on code in PR #38947: URL: https://github.com/apache/spark/pull/38947#discussion_r1119610173
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala: ########## @@ -1399,6 +1399,145 @@ case class ArrayContains(left: Expression, right: Expression) copy(left = newLeft, right = newRight) } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = Review Comment: please also document the null handling like `ArrayApend`: ``` Type of element should be similar to type of the elements of the array. Null element is also appended into the array. But if the array passed, is NULL output is NULL ``` examples ``` > SELECT _FUNC_(array(1, 2, 3, null), null); [null,1,2,3,null] > SELECT _FUNC_(CAST(null as Array<Int>), 2); NULL ``` ########## python/pyspark/sql/functions.py: ########## @@ -7619,6 +7619,36 @@ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column: return _invoke_function_over_columns("get", col, index) +@try_remote_functions +def array_prepend(col: "ColumnOrName", element: Any) -> Column: + """ + Collection function: Returns an array containing element as + well as all elements from array. The new element is positioned + at the beginning of the array. + + .. versionadded:: 3.4.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + name of column containing array + element : + element to be prepended to the array + + Returns + ------- + :class:`~pyspark.sql.Column` + an array excluding given value. + + Examples + -------- + >>> df = spark.createDataFrame([([2, 3, 4],), ([],)], ['data']) + >>> df.select(array_prepend(df.data, 1)).collect() + [Row(array_prepend(data, 1)=[1, 2, 3, 4]), Row(array_prepend(data, 1)=[1])] + """ + return _invoke_function("array_prepend", _to_java_column(col), element) Review Comment: ```suggestion return _invoke_function_over_columns("array_prepend", col, lit(value)) ``` ########## python/pyspark/sql/functions.py: ########## @@ -7619,6 +7619,36 @@ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column: return _invoke_function_over_columns("get", col, index) +@try_remote_functions +def array_prepend(col: "ColumnOrName", element: Any) -> Column: + """ + Collection function: Returns an array containing element as + well as all elements from array. The new element is positioned + at the beginning of the array. + + .. versionadded:: 3.4.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + name of column containing array + element : + element to be prepended to the array Review Comment: ```suggestion value : a literal value, or a :class:`~pyspark.sql.Column` expression. ``` ########## python/pyspark/sql/functions.py: ########## @@ -7619,6 +7619,36 @@ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column: return _invoke_function_over_columns("get", col, index) +@try_remote_functions +def array_prepend(col: "ColumnOrName", element: Any) -> Column: Review Comment: ```suggestion def array_prepend(col: "ColumnOrName", value: Any) -> Column: ``` to be consistent with `array_append` ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala: ########## @@ -1399,6 +1399,145 @@ case class ArrayContains(left: Expression, right: Expression) copy(left = newLeft, right = newRight) } +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = + "_FUNC_(array, value) - Returns an array containing value as well as all elements from array. The new element is positioned at the beginning of the array.", + examples = """ + Examples: + > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd'); + ["d","b","d","c","a"] + """, + group = "array_funcs", + since = "3.4.0") +case class ArrayPrepend(left: Expression, right: Expression) + extends BinaryExpression + with ImplicitCastInputTypes + with ComplexTypeMergingExpression + with QueryErrorsBase { + + override def nullable: Boolean = left.nullable + + @transient protected lazy val elementType: DataType = + inputTypes.head.asInstanceOf[ArrayType].elementType + + override def eval(input: InternalRow): Any = { + val value1 = left.eval(input) + if (value1 == null) { + null + } else { + val value2 = right.eval(input) + nullSafeEval(value1, value2) + } + } + override def nullSafeEval(arr: Any, elementData: Any): Any = { + val arrayData = arr.asInstanceOf[ArrayData] + val numberOfElements = arrayData.numElements() + 1 + if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) { + throw QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements) + } + val finalData = new Array[Any](numberOfElements) + finalData.update(0, elementData) + arrayData.foreach(elementType, (i: Int, v: Any) => finalData.update(i + 1, v)) + new GenericArrayData(finalData) + } + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val leftGen = left.genCode(ctx) + val rightGen = right.genCode(ctx) + val f = (arr: String, value: String) => { + val newArraySize = ctx.freshName("newArraySize") + val newArray = ctx.freshName("newArray") + val i = ctx.freshName("i") + val pos = ctx.freshName("pos") Review Comment: I guess we only need one of var `i` and `pos`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org