This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new b9371e3 [SPARK-31171][SQL] size(null) should return null under ansi mode b9371e3 is described below commit b9371e38abbc351106453b14172d6919be0eca82 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Tue Mar 17 11:48:54 2020 -0700 [SPARK-31171][SQL] size(null) should return null under ansi mode Make `size(null)` return null under ANSI mode, regardless of the `spark.sql.legacy.sizeOfNull` config. In https://github.com/apache/spark/pull/27834, we change the result of `size(null)` to be -1 to match the 2.4 behavior and avoid breaking changes. However, it's true that the "return -1" behavior is error-prone when being used with aggregate functions. The current ANSI mode controls a bunch of "better behaviors" like failing on overflow. We don't enable these "better behaviors" by default because they are too breaking. The "return null" behavior of `size(null)` is a good fit of the ANSI mode. No as ANSI mode is off by default. new tests Closes #27936 from cloud-fan/null. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> (cherry picked from commit dc5ebc2d5b8122121d89a9175737bea95ae10126) Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 9 ++++++--- .../catalyst/expressions/CollectionExpressionsSuite.scala | 6 ++++++ .../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala | 12 ++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index e49593e..1331350 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1940,8 +1940,8 @@ object SQLConf { val LEGACY_SIZE_OF_NULL = buildConf("spark.sql.legacy.sizeOfNull") .internal() - .doc("If it is set to true, size of null returns -1. This behavior was inherited from Hive. " + - "The size function returns null for null input if the flag is disabled.") + .doc(s"If it is set to false, or ${ANSI_ENABLED.key} is true, then size of null returns " + + "null. Otherwise, it returns -1, which was inherited from Hive.") .booleanConf .createWithDefault(true) @@ -2759,7 +2759,10 @@ class SQLConf extends Serializable with Logging { def csvColumnPruning: Boolean = getConf(SQLConf.CSV_PARSER_COLUMN_PRUNING) - def legacySizeOfNull: Boolean = getConf(SQLConf.LEGACY_SIZE_OF_NULL) + def legacySizeOfNull: Boolean = { + // size(null) should return null under ansi mode. + getConf(SQLConf.LEGACY_SIZE_OF_NULL) && !getConf(ANSI_ENABLED) + } def isReplEagerEvalEnabled: Boolean = getConf(SQLConf.REPL_EAGER_EVAL_ENABLED) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 3cfc66f..173f248 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -74,6 +74,12 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") { testSize(sizeOfNull = null) } + // size(null) should return null under ansi mode. + withSQLConf( + SQLConf.LEGACY_SIZE_OF_NULL.key -> "true", + SQLConf.ANSI_ENABLED.key -> "true") { + testSize(sizeOfNull = null) + } } test("MapKeys/MapValues") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index a613c33..c41eb98 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -490,6 +490,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") { testSizeOfArray(sizeOfNull = null) } + // size(null) should return null under ansi mode. + withSQLConf( + SQLConf.LEGACY_SIZE_OF_NULL.key -> "true", + SQLConf.ANSI_ENABLED.key -> "true") { + testSizeOfArray(sizeOfNull = null) + } } test("dataframe arrays_zip function") { @@ -569,6 +575,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") { testSizeOfMap(sizeOfNull = null) } + // size(null) should return null under ansi mode. + withSQLConf( + SQLConf.LEGACY_SIZE_OF_NULL.key -> "true", + SQLConf.ANSI_ENABLED.key -> "true") { + testSizeOfMap(sizeOfNull = null) + } } test("map_keys/map_values function") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org