Repository: spark Updated Branches: refs/heads/master 444c2d22e -> 4b2011ec9
[SPARK-17989][SQL] Check ascendingOrder type in sort_array function rather than throwing ClassCastException ## What changes were proposed in this pull request? This PR proposes to check the second argument, `ascendingOrder` rather than throwing `ClassCastException` exception message. ```sql select sort_array(array('b', 'd'), '1'); ``` **Before** ``` 16/10/19 13:16:08 ERROR SparkSQLDriver: Failed in [select sort_array(array('b', 'd'), '1')] java.lang.ClassCastException: org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.Boolean at scala.runtime.BoxesRunTime.unboxToBoolean(BoxesRunTime.java:85) at org.apache.spark.sql.catalyst.expressions.SortArray.nullSafeEval(collectionOperations.scala:185) at org.apache.spark.sql.catalyst.expressions.BinaryExpression.eval(Expression.scala:416) at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:50) at org.apache.spark.sql.catalyst.optimizer.ConstantFolding$$anonfun$apply$1$$anonfun$applyOrElse$1.applyOrElse(expressions.scala:43) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:292) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:292) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:74) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:291) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:297) ``` **After** ``` Error in query: cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7; ``` ## How was this patch tested? Unit test in `DataFrameFunctionsSuite`. Author: hyukjinkwon <gurwls...@gmail.com> Closes #15532 from HyukjinKwon/SPARK-17989. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b2011ec Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b2011ec Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b2011ec Branch: refs/heads/master Commit: 4b2011ec9da1245923b5cbd883240fef0dbf3ef0 Parents: 444c2d2 Author: hyukjinkwon <gurwls...@gmail.com> Authored: Wed Oct 19 19:36:21 2016 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Wed Oct 19 19:36:21 2016 -0700 ---------------------------------------------------------------------- .../expressions/collectionOperations.scala | 8 +++++++- .../test/resources/sql-tests/inputs/array.sql | 6 ++++++ .../resources/sql-tests/results/array.sql.out | 21 +++++++++++++++++--- 3 files changed, 31 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4b2011ec/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index c020029..f56bb39 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -124,7 +124,13 @@ case class SortArray(base: Expression, ascendingOrder: Expression) override def checkInputDataTypes(): TypeCheckResult = base.dataType match { case ArrayType(dt, _) if RowOrdering.isOrderable(dt) => - TypeCheckResult.TypeCheckSuccess + ascendingOrder match { + case Literal(_: Boolean, BooleanType) => + TypeCheckResult.TypeCheckSuccess + case _ => + TypeCheckResult.TypeCheckFailure( + "Sort order in second argument requires a boolean literal.") + } case ArrayType(dt, _) => TypeCheckResult.TypeCheckFailure( s"$prettyName does not support sorting array of type ${dt.simpleString}") http://git-wip-us.apache.org/repos/asf/spark/blob/4b2011ec/sql/core/src/test/resources/sql-tests/inputs/array.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql index 4038a0d..984321a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/array.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql @@ -71,6 +71,12 @@ select sort_array(timestamp_array) from primitive_arrays; +-- sort_array with an invalid string literal for the argument of sort order. +select sort_array(array('b', 'd'), '1'); + +-- sort_array with an invalid null literal casted as boolean for the argument of sort order. +select sort_array(array('b', 'd'), cast(NULL as boolean)); + -- size select size(boolean_array), http://git-wip-us.apache.org/repos/asf/spark/blob/4b2011ec/sql/core/src/test/resources/sql-tests/results/array.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out index 4a1d149..499a3d5 100644 --- a/sql/core/src/test/resources/sql-tests/results/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out @@ -124,8 +124,23 @@ struct<sort_array(boolean_array, true):array<boolean>,sort_array(tinyint_array, -- !query 8 output [true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00.0,2016-11-15 20:54:00.0] - -- !query 9 +select sort_array(array('b', 'd'), '1') +-- !query 9 schema +struct<> +-- !query 9 output +org.apache.spark.sql.AnalysisException +cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7 + +-- !query 10 +select sort_array(array('b', 'd'), cast(NULL as boolean)) +-- !query 10 schema +struct<> +-- !query 10 output +org.apache.spark.sql.AnalysisException +cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7 + +-- !query 11 select size(boolean_array), size(tinyint_array), @@ -138,7 +153,7 @@ select size(date_array), size(timestamp_array) from primitive_arrays --- !query 9 schema +-- !query 11 schema struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,size(int_array):int,size(bigint_array):int,size(decimal_array):int,size(double_array):int,size(float_array):int,size(date_array):int,size(timestamp_array):int> --- !query 9 output +-- !query 11 output 1 2 2 2 2 2 2 2 2 2 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org