subject:"\[GitHub\] \[spark\] infoankitp commented on a diff in pull request #38865\: \[SPARK\-41232\]\[SQL\]\[PYTHON\] Adding array

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2023-01-03 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1061149004


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4630,4 +4630,144 @@ case class ArrayCompact(child: Expression)
 copy(child = newChild)
 }
 
+/**

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2023-01-03 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1061145796


##
sql/core/src/test/resources/sql-tests/inputs/array.sql:
##
@@ -137,3 +137,11 @@ select array_compact(array("a", "b", "c"));
 select array_compact(array(1D, null, 2D, null));
 select array_compact(array(array(1, 2, 3, null), null, array(4, null, 6)));
 select array_compact(array(null));
+
+-- function array_append

Review Comment:
   Added the tests but had to add the type cast for empty array in second one. 
Else it was returning null
   select array_append(CAST(array() AS ARRAY), CAST(NULL AS String));



##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4630,4 +4630,144 @@ case class ArrayCompact(child: Expression)
 copy(child = newChild)
 }
 
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = """
+  _FUNC_(array, element) - Add the element at the end of the array passed 
as first
+  argument. Type of element should be similar to type of the elements of 
the array.
+  """,
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes
+with ComplexTypeMergingExpression
+with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  @transient protected lazy val elementType: DataType =
+inputTypes.head.asInstanceOf[ArrayType].elementType
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) if e1.sameType(e2) => 
TypeCheckResult.TypeCheckSuccess
+  case (ArrayType(e1, _), e2) => DataTypeMismatch(
+errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+messageParameters = Map(
+  "functionName" -> toSQLId(prettyName),
+  "leftType" -> toSQLType(left.dataType),
+  "rightType" -> toSQLType(right.dataType),
+  "dataType" -> toSQLType(ArrayType)
+))
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+if (value1 == null) {
+  null
+} else {
+  val value2 = right.eval(input)
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override protected def nullSafeEval(arr: Any, elementData: Any): Any = {
+val arrayData = arr.asInstanceOf[ArrayData]
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(elementType, finalData.update)
+finalData.update(arrayData.numElements(), elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def nullable: Boolean = left.nullable
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)
+val f = (eval1: String, eval2: String) => {
+  val newArraySize = ctx.freshName("newArraySize")
+  val i = ctx.freshName("i")
+  val values = ctx.freshName("values")
+  val allocation = CodeGenerator.createArrayData(
+values, elementType, newArraySize, s" $prettyName failed.")
+  val assignment = CodeGenerator.createArrayAssignment(
+values, elementType, eval1, i, i, 
left.dataType.asInstanceOf[ArrayType].containsNull)
+  s"""
+ |int $newArraySize = $eval1.numElements() + 1;
+ |$allocation
+ |int $i = 0;
+ |while ($i < $eval1.numElements()) {
+ |  $assignment
+ |  $i ++;
+ |}
+ |${CodeGenerator.setArrayElement(values, elementType, i, eval2, 
Some(rightGen.isNull))}
+ |${ev.value} = $values;
+ |""".stripMargin
+}
+val resultCode = f(leftGen.value, rightGen.value)
+if (nullable) {
+  val nullSafeEval =
+leftGen.code + rightGen.code + ctx.nullSafeExec(left.nullable,

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-29 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1059280513


##
sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out:
##
@@ -489,3 +489,43 @@ select get(array(1, 2, 3), -1)
 struct
 -- !query output
 NULL
+
+
+-- !query
+select array_append(array(1, 2, 3), 4)
+-- !query schema
+struct>
+-- !query output
+[1,2,3,4]
+
+
+-- !query
+select array_append(array('a', 'b', 'c'), 'd')
+-- !query schema
+struct>
+-- !query output
+["a","b","c","d"]
+
+
+-- !query
+select array_append(array(1, 2, 3), CAST(null AS INT))
+-- !query schema
+struct>
+-- !query output
+NULL

Review Comment:
   Yes ! Changed the output as well as sql. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-29 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1059280320


##
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala:
##
@@ -2596,4 +2596,113 @@ class CollectionExpressionsSuite extends SparkFunSuite 
with ExpressionEvalHelper
   Date.valueOf("2017-02-12")))
 }
   }
+
+  test("ArrayAppend Expression Test") {
+checkEvaluation(
+  ArrayAppend(
+Literal.create(null, ArrayType(StringType)),
+Literal.create("c", StringType)),
+  null)
+
+checkEvaluation(
+  ArrayAppend(
+Literal.create(null, ArrayType(StringType)),
+Literal.create(null, StringType)),
+  null)
+
+checkEvaluation(
+  ArrayAppend(
+Literal.create(Seq(""), ArrayType(StringType)),
+Literal.create(null, StringType)),
+  null)
+
+checkEvaluation(
+  ArrayAppend(
+Literal.create(Seq(Double.NaN, 1d, 2d), ArrayType(DoubleType)),
+Literal.create(3d, DoubleType)),
+  Seq(Double.NaN, 1d, 2d, 3d))
+// Null entry check
+checkEvaluation(
+  ArrayAppend(
+Literal.create(Seq(null, 1d, 2d), ArrayType(DoubleType)),
+Literal.create(3d, DoubleType)),
+  Seq(null, 1d, 2d, 3d))
+checkEvaluation(
+  ArrayAppend(
+Literal.create(
+  Seq(Date.valueOf("2017-04-06"), Date.valueOf("2017-03-23"), 
Date.valueOf("2017-03-10")),

Review Comment:
   Oh ! Ok! Let me delete these test cases. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-29 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1059280201


##
python/pyspark/sql/connect/functions.py:
##
@@ -1098,6 +1098,13 @@ def array_intersect(col1: "ColumnOrName", col2: 
"ColumnOrName") -> Column:
 array_intersect.__doc__ = pysparkfuncs.array_intersect.__doc__
 
 
+def array_append(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:

Review Comment:
   oh Sure ! Let me remove the changes being done for connect 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-26 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1057236944


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,111 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = """_FUNC_(array, element) - Add the element at the end of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.""",

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-26 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1057236664


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,111 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = """_FUNC_(array, element) - Add the element at the end of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+

Review Comment:
   Removed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-26 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1057197385


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {
+TypeCheckResult.TypeCheckSuccess
+  }
+  else {
+DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  }
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+val f = (left: String, right: String) => {
+  val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+  s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"
+}
+
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)

Review Comment:
   @zhengruifeng Thanks for notifying ! Let me make the changes in accordance 
to this. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-26 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1057196837


##
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala:
##
@@ -2596,4 +2596,113 @@ class CollectionExpressionsSuite extends SparkFunSuite 
with ExpressionEvalHelper
   Date.valueOf("2017-02-12")))
 }
   }
+
+  test("ArrayAppend Expression Test") {
+checkEvaluation(
+  ArrayAppend(
+Literal.create(null, ArrayType(StringType)),
+Literal.create("c", StringType)),
+  null)
+
+checkEvaluation(
+  ArrayAppend(
+Literal.create(null, ArrayType(StringType)),
+Literal.create(null, StringType)),
+  null)
+
+checkEvaluation(
+  ArrayAppend(
+Literal.create(Seq(""), ArrayType(StringType)),
+Literal.create(null, StringType)),
+  null)
+
+checkEvaluation(
+  ArrayAppend(
+Literal.create(Seq(Double.NaN, 1d, 2d), ArrayType(DoubleType)),
+Literal.create(3d, DoubleType)),
+  Seq(Double.NaN, 1d, 2d, 3d))
+// Null entry check
+checkEvaluation(
+  ArrayAppend(
+Literal.create(Seq(null, 1d, 2d), ArrayType(DoubleType)),
+Literal.create(3d, DoubleType)),
+  Seq(null, 1d, 2d, 3d))
+checkEvaluation(
+  ArrayAppend(
+Literal.create(
+  Seq(Date.valueOf("2017-04-06"), Date.valueOf("2017-03-23"), 
Date.valueOf("2017-03-10")),

Review Comment:
   Generally testing date scenario, nothing specific.



##
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala:
##
@@ -2596,4 +2596,113 @@ class CollectionExpressionsSuite extends SparkFunSuite 
with ExpressionEvalHelper
   Date.valueOf("2017-02-12")))
 }
   }
+
+  test("ArrayAppend Expression Test") {
+checkEvaluation(
+  ArrayAppend(
+Literal.create(null, ArrayType(StringType)),
+Literal.create("c", StringType)),
+  null)
+
+checkEvaluation(
+  ArrayAppend(
+Literal.create(null, ArrayType(StringType)),
+Literal.create(null, StringType)),
+  null)
+
+checkEvaluation(
+  ArrayAppend(
+Literal.create(Seq(""), ArrayType(StringType)),
+Literal.create(null, StringType)),
+  null)
+
+checkEvaluation(
+  ArrayAppend(
+Literal.create(Seq(Double.NaN, 1d, 2d), ArrayType(DoubleType)),
+Literal.create(3d, DoubleType)),
+  Seq(Double.NaN, 1d, 2d, 3d))
+// Null entry check
+checkEvaluation(
+  ArrayAppend(
+Literal.create(Seq(null, 1d, 2d), ArrayType(DoubleType)),
+Literal.create(3d, DoubleType)),
+  Seq(null, 1d, 2d, 3d))
+checkEvaluation(
+  ArrayAppend(
+Literal.create(
+  Seq(Date.valueOf("2017-04-06"), Date.valueOf("2017-03-23"), 
Date.valueOf("2017-03-10")),

Review Comment:
   Generally testing date scenario, nothing specific.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-16 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1050467934


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,111 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = """_FUNC_(array, element) - Add the element at the end of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes
+with ComplexTypeMergingExpression
+with NullIntolerant
+with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  @transient protected lazy val elementType: DataType =
+inputTypes.head.asInstanceOf[ArrayType].elementType
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) if e1.sameType(e2) => 
TypeCheckResult.TypeCheckSuccess
+  case (ArrayType(e1, _), e2) => DataTypeMismatch(
+errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+messageParameters = Map(
+  "functionName" -> toSQLId(prettyName),
+  "leftType" -> toSQLType(left.dataType),
+  "rightType" -> toSQLType(right.dataType),
+  "dataType" -> toSQLType(ArrayType)
+))
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(elementType, finalData.update)
+finalData.update(arrayData.numElements(), elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(
+  ctx, ev, (eval1, eval2) => {
+val newArraySize = ctx.freshName("newArraySize")
+val i = ctx.freshName("i")
+val values = ctx.freshName("values")
+val allocation = CodeGenerator.createArrayData(
+  values, elementType, newArraySize, s" $prettyName failed.")
+val assignment = CodeGenerator.createArrayAssignment(
+  values, elementType, eval1, i, i, true)
+s"""
+   |int $newArraySize = $eval1.numElements() + 1;
+   |$allocation
+   |int $i = 0;
+   |for (; $i < $eval1.numElements(); $i ++) {

Review Comment:
   Yes ! makes more sense. Let me update it real quick. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-16 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1050467116


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,111 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = """_FUNC_(array, element) - Add the element at the end of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes
+with ComplexTypeMergingExpression
+with NullIntolerant
+with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  @transient protected lazy val elementType: DataType =
+inputTypes.head.asInstanceOf[ArrayType].elementType
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) if e1.sameType(e2) => 
TypeCheckResult.TypeCheckSuccess
+  case (ArrayType(e1, _), e2) => DataTypeMismatch(
+errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+messageParameters = Map(
+  "functionName" -> toSQLId(prettyName),
+  "leftType" -> toSQLType(left.dataType),
+  "rightType" -> toSQLType(right.dataType),
+  "dataType" -> toSQLType(ArrayType)
+))
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val elementData = input2

Review Comment:
   Yes ! Only to make it more readable, sure can change the argument name real 
quick. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-14 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1048177477


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,135 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = """_FUNC_(array, element) - Add the element at the end of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends ArrayAddElement {
+  override def prettyName: String = "array_append"
+  override val prependFlag: Boolean = false
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+}
+
+trait ArrayAddElement

Review Comment:
   nvm removed it ! Please help review it again. Thanks



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-14 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1048163037


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,135 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = """_FUNC_(array, element) - Add the element at the end of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends ArrayAddElement {
+  override def prettyName: String = "array_append"
+  override val prependFlag: Boolean = false
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+}
+
+trait ArrayAddElement

Review Comment:
   Oh Ok ! I thought it would be easier to do the change later if we keep the 
abstraction in place already. Should I still remove it ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-09 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1044328932


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,92 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) if e1.sameType(e2) => 
TypeCheckResult.TypeCheckSuccess
+  case (ArrayType(e1, _), e2) => DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(
+  ctx, ev, (left: String, right: String) => {
+val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"

Review Comment:
   @beliefer @LuciferYang I am thinking ArrayAppend and ArrayPrepend can 
probably be taken care by same code piece, since most of the checks and 
processings are going to be same. I can create a trait have everything there 
being controlled by a boolean flag. Let me update the code real quick for the 
same, I think it will be make both functions cleaner. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-09 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1044277425


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,92 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) if e1.sameType(e2) => 
TypeCheckResult.TypeCheckSuccess
+  case (ArrayType(e1, _), e2) => DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(
+  ctx, ev, (left: String, right: String) => {
+val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"

Review Comment:
   @beliefer Updated the code for generating the code for the same manually. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1044143098


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,92 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) if e1.sameType(e2) => 
TypeCheckResult.TypeCheckSuccess
+  case (ArrayType(e1, _), e2) => DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(
+  ctx, ev, (left: String, right: String) => {
+val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"

Review Comment:
   Thanks for putting these points forward @beliefer ! Let me work on it. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1043270507


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,92 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) if e1.sameType(e2) => 
TypeCheckResult.TypeCheckSuccess
+  case (ArrayType(e1, _), e2) => DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(
+  ctx, ev, (left: String, right: String) => {
+val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"

Review Comment:
   oh is it ? It is actually similar piece of code, I used it since I was 
overriding the behaviour we had for checking null entries but now since we are 
kind of NullIntolerant, so this is actually okay since by default it puts null 
checks cleanly. But not sure if we gain anything specific, please help list out 
the same. Thanks ! 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1043270507


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,92 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) if e1.sameType(e2) => 
TypeCheckResult.TypeCheckSuccess
+  case (ArrayType(e1, _), e2) => DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(
+  ctx, ev, (left: String, right: String) => {
+val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"

Review Comment:
   oh is it ? It is actually similar piece of code, I used it since I was 
overriding the behaviour we had for checking null entries but now since we are 
kind of NullIntolerant, so this is actually okay since by default it puts null 
checks cleanly. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1043117095


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {
+TypeCheckResult.TypeCheckSuccess
+  }
+  else {
+DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  }
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+val f = (left: String, right: String) => {
+  val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+  s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"
+}
+
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)

Review Comment:
   @beliefer Please refer to this thread. :) 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1043116258


##
sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out:
##
@@ -489,3 +489,43 @@ select get(array(1, 2, 3), -1)
 struct
 -- !query output
 NULL
+
+
+-- !query
+select array_append(array(1, 2, 3), 4)
+-- !query schema
+struct>
+-- !query output
+[1,2,3,4]
+
+
+-- !query
+select array_append(array('a', 'b', 'c'), 'd')
+-- !query schema
+struct>
+-- !query output
+["a","b","c","d"]
+
+
+-- !query
+select array_append(array(1, 2, 3), CAST(null AS INT))
+-- !query schema
+struct>
+-- !query output
+NULL

Review Comment:
   If any of the input is null, we are returning null for the same. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1043113737


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,92 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) if e1.sameType(e2) => 
TypeCheckResult.TypeCheckSuccess
+  case (ArrayType(e1, _), e2) => DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(
+  ctx, ev, (left: String, right: String) => {
+val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"

Review Comment:
   I was doing that earlier when I had to override the behaviour of null 
checks. But now when we are not overriding the null safety in the codegen, 
thought of reusing the same nullSafeCodeGen. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1043111724


##
sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out:
##
@@ -489,3 +489,43 @@ select get(array(1, 2, 3), -1)
 struct
 -- !query output
 NULL
+
+
+-- !query
+select array_append(array(1, 2, 3), 4)
+-- !query schema
+struct>
+-- !query output
+[1,2,3,4]
+
+
+-- !query
+select array_append(array('a', 'b', 'c'), 'd')
+-- !query schema
+struct>
+-- !query output
+["a","b","c","d"]
+
+
+-- !query
+select array_append(array(1, 2, 3), CAST(null AS INT))
+-- !query schema
+struct>
+-- !query output
+NULL

Review Comment:
   This is more consistent with other high order functions we are having. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1043086166


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1043043495


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {
+TypeCheckResult.TypeCheckSuccess
+  }
+  else {
+DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  }
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+val f = (left: String, right: String) => {
+  val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+  s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"
+}
+
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)

Review Comment:
   I think its okay if we are not consistent with the behaviour we have in 
snowflake and are more consistent to the existing high order functions in spark 
itself. In that we won't need to override the eval function also, we won't need 
to write the codeGen function completely. We will just be able to create 
nullSafeCodeGen function and call the function we need to call. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-08 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1043040982


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {

Review Comment:
   Let me reformat



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-07 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1042184452


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {
+TypeCheckResult.TypeCheckSuccess
+  }
+  else {
+DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  }
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+val f = (left: String, right: String) => {
+  val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+  s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"
+}
+
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)

Review Comment:
   Then it would not be consistent with snowflake's array_append function.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1041824037


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {
+TypeCheckResult.TypeCheckSuccess
+  }
+  else {
+DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  }
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+val f = (left: String, right: String) => {
+  val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+  s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"
+}
+
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)

Review Comment:
   Nope the elements inside the src Array are not being translated to -1. 
   
   ```
   
   val df10 = spark.createDataFrame(
 spark.sparkContext.parallelize(
   Seq(Row(Seq[Integer](1, 2, 3, null), null))),
 StructType(List(
   StructField("a", ArrayType.apply(IntegerType), true),
   StructField("b", IntegerType, true)
 ))
   )
   
   df10.selectExpr("array_append(a, b)").printSchema()
   checkAnswer(df10.selectExpr("array_append(a, b)"),
 Seq(Row(Seq(1, 2, 3, null, null)))
   )
   ```
   Output
   ```
   == Results ==
   !== Correct Answer - 1 ==  == Spark Answer - 1 ==
   !struct<>  struct>
   ![List(1, 2, 3, null, null)]   [ArrayBuffer(1, 2, 3, null, -1)]
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail:

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1041666229


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {
+TypeCheckResult.TypeCheckSuccess
+  }
+  else {
+DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  }
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+val f = (left: String, right: String) => {
+  val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+  s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"
+}
+
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)

Review Comment:
   To give you a sense of the problem
   ```
   val df9 = spark.createDataFrame(
 spark.sparkContext.parallelize(
   Seq(Row(Array[Int](1, 2, 3), null))),
 StructType(List(
   StructField("a", ArrayType.apply(IntegerType), true),
   StructField("b", IntegerType, true)
 ))
   )
   df9.show
   df9.selectExpr("array_append(a, b)").printSchema()
   checkAnswer(df9.selectExpr("array_append(a, b)"),
 Seq(Row(Seq(1, 2, 3, null)))
   )
   
   ```
   
   Output
   ```
   == Results ==
   !== Correct Answer - 1 ==   == Spark Answer - 1 ==
   !struct<>   struct>
   ![List(1, 2, 3, null)]  [ArrayBuffer(1, 2, 3, -1)]
   
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands,

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1041665895


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {
+TypeCheckResult.TypeCheckSuccess
+  }
+  else {
+DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  }
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+val f = (left: String, right: String) => {
+  val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+  s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"
+}
+
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)

Review Comment:
   Since it will automatically translate all null elements to default values 
for primitive types atleast. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1041664885


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,133 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["b","d","c","a","d"]
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with ComplexTypeMergingExpression
+  with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+  case _ => Seq.empty
+}
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), e2) => if (e1.sameType(e2)) {
+TypeCheckResult.TypeCheckSuccess
+  }
+  else {
+DataTypeMismatch(
+  errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> toSQLId(prettyName),
+"leftType" -> toSQLType(left.dataType),
+"rightType" -> toSQLType(right.dataType),
+"dataType" -> toSQLType(ArrayType)
+  ))
+  }
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "UNEXPECTED_INPUT_TYPE",
+  messageParameters = Map(
+"paramIndex" -> "0",
+"requiredType" -> toSQLType(ArrayType),
+"inputSql" -> toSQLExpr(left),
+"inputType" -> toSQLType(left.dataType)
+  )
+)
+}
+  }
+
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): ArrayAppend =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+arrayData.foreach(arrayElementType, finalData.update)
+finalData.update(numberOfElements - 1, elementData)
+new GenericArrayData(finalData)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+val f = (left: String, right: String) => {
+  val expr = ctx.addReferenceObj("arraysAppendExpr", this)
+  s"${ev.value} = (ArrayData)$expr.nullSafeEval($left, $right);"
+}
+
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)

Review Comment:
   @LuciferYang One more thing I noticed, if we are having a null int value, it 
is getting translated to -1. It is picking up the default value for Integer 
primitive type, in the JavaCode. I think we should override this behaviour 
right ? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1040953239


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,118 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes with QueryErrorsBase {

Review Comment:
   Got it ! 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1040942117


##
sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala:
##
@@ -5237,6 +5237,59 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
   )
 )
   }
+
+  test("SPARK-41232 array_append -> Unit Test cases for the function ") {
+val df1 = Seq((Array[Int](3, 2, 5, 1, 2), 3)).toDF("a", "b")

Review Comment:
   BTW I added another test in the DataFrameSuite and it is promoting the 
element to double and then appending it as a double element in the array. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1040854537


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,118 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes with QueryErrorsBase {

Review Comment:
   Ahh got it ! Thanks ! Let me fix this! Do you use any specific plugin with 
IDE ? That highlights all these, I have added checkstyle extension but it 
didn't highlight such things. :) 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1040845039


##
sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala:
##
@@ -5237,6 +5237,59 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
   )
 )
   }
+
+  test("SPARK-41232 array_append -> Unit Test cases for the function ") {
+val df1 = Seq((Array[Int](3, 2, 5, 1, 2), 3)).toDF("a", "b")

Review Comment:
   It will throw an exception, since in the input types the types has to be the 
exact for the element and array elements. Let me add a unit test for the same 
as well. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1040845039


##
sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala:
##
@@ -5237,6 +5237,59 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
   )
 )
   }
+
+  test("SPARK-41232 array_append -> Unit Test cases for the function ") {
+val df1 = Seq((Array[Int](3, 2, 5, 1, 2), 3)).toDF("a", "b")

Review Comment:
   It will throw an exception, since in the input types we are trying to match 
the exact element. Let me add a unit test for the same as well. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1040837728


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,118 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  override def inputTypes: Seq[AbstractDataType] = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, hasNull), e2) =>
+TypeCoercion.findTightestCommonType(e1, e2) match {
+  case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+  case _ => Seq.empty
+}
+}
+Seq.empty
+  }
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+val value2 = right.eval(input)
+if (value1 == null) {
+  null
+} else {
+  nullSafeEval(value1, value2)
+}
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(

Review Comment:
   Makes sense ! Let me return UNEXPECTED_INPUT_TYPE in case left's value is 
not an array and ARRAY_FUNCTION_DIFF_TYPES in case when the array elements' 
type doesn't match with the element to be appended. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-06 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1040835864


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,118 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, element) - Append the element",
+  examples =
+"""
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes with QueryErrorsBase {

Review Comment:
   Didn't understand this one! Can you please elaborate a little on this one. 
Thanks! 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-03 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038747836


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>

Review Comment:
   Documenting the behaviour of snowflake for this function in different 
scenarios
   ```
   select array_append(array_construct(1, 2, 3), 'HELLO');
   -- [ 1, 2, 3, "HELLO" ]
   
   
   select array_append(array_construct(1, 2, 3), NULL);
   -- [1,2,3,undefined]
   
   
   select array_append(NULL, 'a');
   -- null
   
   select array_append(NULL, NULL);
   -- null 
   ```
   So, if the array is null, we don't create an array and return null directly 
while if the element is null we just insert it directly without checking if it 
is null.
   
   As of now I think the function is Null Intolerant, with any null value it 
will return null. Need to override this behaviour. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038725848


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "ARRAY_ELEMENT_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> prettyName,
+"arrayType" -> left.dataType.sql,
+"elementType" -> right.dataType.sql))
+}
+  }
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = left.dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2.asInstanceOf[AnyRef]
+val arrayBuffer = new scala.collection.mutable.ArrayBuffer[Any]
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[AnyRef](numberOfElements.toInt)
+val arr = arrayData.toObjectArray(arrayElementType)

Review Comment:
   Done



##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "ARRAY_ELEMENT_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> prettyName,
+"arrayType" -> left.dataType.sql,
+"elementType" -> right.dataType.sql))
+}
+  }
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = left.dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2.asInstanceOf[AnyRef]
+val arrayBuffer = new scala.collection.mutable.ArrayBuffer[Any]
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[AnyRef](numberOfElements.toInt)

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038725820


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "ARRAY_ELEMENT_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> prettyName,
+"arrayType" -> left.dataType.sql,
+"elementType" -> right.dataType.sql))
+}
+  }
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =

Review Comment:
   Done



##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "ARRAY_ELEMENT_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> prettyName,
+"arrayType" -> left.dataType.sql,
+"elementType" -> right.dataType.sql))
+}
+  }
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = left.dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2.asInstanceOf[AnyRef]
+val arrayBuffer = new scala.collection.mutable.ArrayBuffer[Any]

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038725804


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")

Review Comment:
   Done



##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038725789


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "ARRAY_ELEMENT_DIFF_TYPES",

Review Comment:
   Done



##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038205079


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)

Review Comment:
   Checked it, so it promotes the type to the one which can accomodate both



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038012158


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "ARRAY_ELEMENT_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> prettyName,
+"arrayType" -> left.dataType.sql,
+"elementType" -> right.dataType.sql))
+}
+  }
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = left.dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2.asInstanceOf[AnyRef]
+val arrayBuffer = new scala.collection.mutable.ArrayBuffer[Any]
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[AnyRef](numberOfElements.toInt)
+val arr = arrayData.toObjectArray(arrayElementType)

Review Comment:
   Yes 'arrayData.toObjectArray(arrayElementType)' is copying the array to a 
new array, thanks for pointing it out! Let me fix this quickly  



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038002943


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "ARRAY_ELEMENT_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> prettyName,
+"arrayType" -> left.dataType.sql,
+"elementType" -> right.dataType.sql))
+}
+  }
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =

Review Comment:
   Ahh ! Yes ! 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038001213


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));

Review Comment:
   BTW This could be achieved using concat I think 



##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")

Review Comment:
   Got it ! Let me change this as well 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038000808


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",

Review Comment:
   Thanks for pointing it out!



##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));

Review Comment:
   Fixing the example



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1038000294


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "ARRAY_ELEMENT_DIFF_TYPES",
+  messageParameters = Map(
+"functionName" -> prettyName,
+"arrayType" -> left.dataType.sql,
+"elementType" -> right.dataType.sql))
+}
+  }
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: 
Expression): Expression =
+copy(left = newLeft, right = newRight)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = {
+val arrayData = input1.asInstanceOf[ArrayData]
+val arrayElementType = left.dataType.asInstanceOf[ArrayType].elementType
+val elementData = input2.asInstanceOf[AnyRef]
+val arrayBuffer = new scala.collection.mutable.ArrayBuffer[Any]

Review Comment:
   Forgot to clean it up later, was using arrayBuffer earlier. Let me remove 
this real quick 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1037984247


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>
+DataTypeMismatch(
+  errorSubClass = "ARRAY_ELEMENT_DIFF_TYPES",

Review Comment:
   Sure Let me reuse this errorSubClass :)  



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-02 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1037937615


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>

Review Comment:
   I think snowflake's arrays are not strongly typed while spark's arrays' are 
strongly typed, since it does not allow us to mix multiple types of elements in 
one Array. But yes if we want to make it similar to snowflake, we will have to 
make spark arrays also loosely typed. Let's wait for everyone else's 
suggestions as well. And I think it will be a similar construct for 
array_insert, array_prepend as well. :) 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-01 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1037736311


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>

Review Comment:
   @LuciferYang Thanks for the review! Arrays are more strongly typed in Spark 
! In case when we are getting mixed types of elements to append in the array, 
should we change the type of the array itself ?
   Also, we cannot create a column with Array[AnyDataType] like below, we will 
eventually get an error
   
   
   ```
   scala> val df3 = Seq((Array("a", "b", 2, 5d), 3)).toDF("a", "b")
   org.apache.spark.SparkUnsupportedOperationException: No Encoder found for Any
   - array element class: "java.lang.Object"
   - field (class: "scala.Array", name: "_1")
   - root class: "scala.Tuple2"
   
   ```
   
   Which is why I thought that its better to analyze the types of the array at 
the start itself and raise if the types do not match.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-01 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1037736311


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>

Review Comment:
   @LuciferYang Thanks for the review! Arrays are more strongly typed in Spark 
! In case when we are getting mixed types of elements to append in the array, 
should we change the type of the array itself ?
   Also, we cannot create a column with Array[AnyDataType] like below, we will 
eventually get an error
   
   `
   scala> val df3 = Seq((Array("a", "b", 2, 5d), 3)).toDF("a", "b")
   org.apache.spark.SparkUnsupportedOperationException: No Encoder found for Any
   - array element class: "java.lang.Object"
   - field (class: "scala.Array", name: "_1")
   - root class: "scala.Tuple2"
   `
   
   Which is why I thought that its better to analyze the types of the array at 
the start itself and raise if the types do not match.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] infoankitp commented on a diff in pull request #38865: [SPARK-41232][SQL][PYTHON] Adding array_append function

2022-12-01 Thread GitBox



infoankitp commented on code in PR #38865:
URL: https://github.com/apache/spark/pull/38865#discussion_r1037736311


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -4600,3 +4600,69 @@ case class ArrayExcept(left: Expression, right: 
Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
 newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = 
newLeft, right = newRight)
 }
+
+/**
+ * Given an array, and another element append the element at the end of the 
array.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr, expr) - Prepend the element",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), array(1, 2, 3, 4));
+
+  """,
+  since = "3.4.0",
+  group = "collection_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+extends BinaryExpression
+with ImplicitCastInputTypes {
+  override def prettyName: String = "array_append"
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+(left.dataType, right.dataType) match {
+  case (ArrayType(e1, _), (e2)) if e1.sameType(e2) =>
+TypeCheckResult.TypeCheckSuccess
+  case _ =>

Review Comment:
   @LuciferYang Thanks for the review! Arrays are more strongly typed in Spark 
! In case when we are getting mixed types of elements to append in the array, 
should we change the type of the array itself ?
   Also, we cannot create a column with Array[AnyDataType] like below, we will 
eventually get an error
   
   `scala> val df3 = Seq((Array("a", "b", 2, 5d), 3)).toDF("a", "b")
   org.apache.spark.SparkUnsupportedOperationException: No Encoder found for Any
   - array element class: "java.lang.Object"
   - field (class: "scala.Array", name: "_1")
   - root class: "scala.Tuple2"
   `
   
   Which is why I thought that its better to analyze the types of the array at 
the start itself and raise if the types do not match.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

53 matches

Mail list logo