[spark] branch branch-3.2 updated: [SPARK-36224][SQL] Use Void as the type name of NullType

wenchen Mon, 02 Aug 2021 08:21:23 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new e26cb96  [SPARK-36224][SQL] Use Void as the type name of NullType
e26cb96 is described below

commit e26cb968bdaff1cce1d5c050226eac1d01e3e947
Author: Linhong Liu <linhong....@databricks.com>
AuthorDate: Mon Aug 2 23:19:54 2021 +0800

    [SPARK-36224][SQL] Use Void as the type name of NullType
    
    ### What changes were proposed in this pull request?
    Change the `NullType.simpleString` to "void" to set "void" as the formal 
type name of `NullType`
    
    ### Why are the changes needed?
    This PR is intended to address the type name discussion in PR #28833. Here 
are the reasons:
    1. The type name of NullType is displayed everywhere, e.g. schema string, 
error message, document. Hence it's not possible to hide it from users, we have 
to choose a proper name
    2. The "void" is widely used as the type name of "NULL", e.g. Hive, pgSQL
    3. Changing to "void" can enable the round trip of `toDDL`/`fromDDL` for 
NullType. (i.e. make `from_json(col, schema.toDDL)`) work
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, the type name of "NULL" is changed from "null" to "void". for example:
    ```
    scala> sql("select null as a, 1 as b").schema.catalogString
    res5: String = struct<a:void,b:int>
    ```
    
    ### How was this patch tested?
    existing test cases
    
    Closes #33437 from linhongliu-db/SPARK-36224-void-type-name.
    
    Authored-by: Linhong Liu <linhong....@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit 2f700773c2e8fac26661d0aa8024253556a921ba)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 python/pyspark/sql/tests/test_types.py              |  3 +--
 python/pyspark/sql/types.py                         |  4 +++-
 .../scala/org/apache/spark/sql/types/DataType.scala |  2 ++
 .../scala/org/apache/spark/sql/types/NullType.scala |  2 ++
 .../org/apache/spark/sql/types/DataTypeSuite.scala  |  6 ++++++
 .../sql-functions/sql-expression-schema.md          |  6 +++---
 .../sql-tests/results/ansi/literals.sql.out         |  2 +-
 .../sql-tests/results/ansi/string-functions.sql.out |  4 ++--
 .../sql-tests/results/inline-table.sql.out          |  2 +-
 .../resources/sql-tests/results/literals.sql.out    |  2 +-
 .../sql-tests/results/misc-functions.sql.out        |  4 ++--
 .../sql-tests/results/postgreSQL/select.sql.out     |  4 ++--
 .../sql-tests/results/postgreSQL/text.sql.out       |  6 +++---
 .../results/sql-compatibility-functions.sql.out     |  6 +++---
 .../results/table-valued-functions.sql.out          |  2 +-
 .../sql-tests/results/udf/udf-inline-table.sql.out  |  2 +-
 .../apache/spark/sql/FileBasedDataSourceSuite.scala |  2 +-
 .../SparkExecuteStatementOperation.scala            |  1 -
 .../spark/sql/hive/client/HiveClientImpl.scala      | 21 +--------------------
 .../spark/sql/hive/execution/HiveDDLSuite.scala     | 12 ++++++------
 .../spark/sql/hive/orc/HiveOrcSourceSuite.scala     |  2 +-
 21 files changed, 43 insertions(+), 52 deletions(-)

diff --git a/python/pyspark/sql/tests/test_types.py 
b/python/pyspark/sql/tests/test_types.py
index eb4caf0..33fe785 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -496,8 +496,7 @@ class TypesTests(ReusedSQLTestCase):
     def test_parse_datatype_string(self):
         from pyspark.sql.types import _all_atomic_types, _parse_datatype_string
         for k, t in _all_atomic_types.items():
-            if t != NullType:
-                self.assertEqual(t(), _parse_datatype_string(k))
+            self.assertEqual(t(), _parse_datatype_string(k))
         self.assertEqual(IntegerType(), _parse_datatype_string("int"))
         self.assertEqual(DecimalType(1, 1), _parse_datatype_string("decimal(1  
,1)"))
         self.assertEqual(DecimalType(10, 1), _parse_datatype_string("decimal( 
10,1 )"))
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 4b5632b..5e3398b 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -107,7 +107,9 @@ class NullType(DataType, metaclass=DataTypeSingleton):
 
     The data type representing None, used for the types that cannot be 
inferred.
     """
-    pass
+    @classmethod
+    def typeName(cls):
+        return 'void'
 
 
 class AtomicType(DataType):
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index ff6a49a..585045d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -195,6 +195,8 @@ object DataType {
       case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, 
scale.toInt)
       case CHAR_TYPE(length) => CharType(length.toInt)
       case VARCHAR_TYPE(length) => VarcharType(length.toInt)
+      // For backwards compatibility, previously the type name of NullType is 
"null"
+      case "null" => NullType
       case other => otherTypes.getOrElse(
         other,
         throw new IllegalArgumentException(
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
index 14097a5..d211fac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
@@ -32,6 +32,8 @@ class NullType private() extends DataType {
   override def defaultSize: Int = 1
 
   private[spark] override def asNullable: NullType = this
+
+  override def typeName: String = "void"
 }
 
 /**
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 1c34b50..4ac8281 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -183,6 +183,10 @@ class DataTypeSuite extends SparkFunSuite {
     assert(!arrayType.existsRecursively(_.isInstanceOf[IntegerType]))
   }
 
+  test("SPARK-36224: Backwards compatibility test for NullType.json") {
+    assert(DataType.fromJson("\"null\"") == NullType)
+  }
+
   def checkDataTypeFromJson(dataType: DataType): Unit = {
     test(s"from Json - $dataType") {
       assert(DataType.fromJson(dataType.json) === dataType)
@@ -198,6 +202,7 @@ class DataTypeSuite extends SparkFunSuite {
   }
 
   checkDataTypeFromJson(NullType)
+  checkDataTypeFromDDL(NullType)
 
   checkDataTypeFromJson(BooleanType)
   checkDataTypeFromDDL(BooleanType)
@@ -424,6 +429,7 @@ class DataTypeSuite extends SparkFunSuite {
     i => StructField(s"col$i", IntegerType, nullable = true)
   })
 
+  checkCatalogString(NullType)
   checkCatalogString(BooleanType)
   checkCatalogString(ByteType)
   checkCatalogString(ShortType)
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md 
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 41692d2..6eafb38 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -34,7 +34,7 @@
 | org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT 
ascii('222') | struct<ascii(222):int> |
 | org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | 
struct<ASIN(0):double> |
 | org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | 
struct<ASINH(0):double> |
-| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT 
assert_true(0 < 1) | struct<assert_true((0 < 1), '(0 < 1)' is not true!):null> |
+| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT 
assert_true(0 < 1) | struct<assert_true((0 < 1), '(0 < 1)' is not true!):void> |
 | org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | 
struct<ATAN(0):double> |
 | org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) 
| struct<ATAN2(0, 0):double> |
 | org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | 
struct<ATANH(0):double> |
@@ -223,7 +223,7 @@
 | org.apache.spark.sql.catalyst.expressions.RLike | regexp | SELECT 
regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | 
struct<REGEXP(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
 | org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT 
regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | 
struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
 | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT 
rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | 
struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
-| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT 
raise_error('custom error message') | struct<raise_error(custom error 
message):null> |
+| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT 
raise_error('custom error message') | struct<raise_error(custom error 
message):void> |
 | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | 
struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | 
struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | 
struct<randn():double> |
@@ -366,4 +366,4 @@
 | org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT 
xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()') | 
struct<xpath(<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>, 
a/b/text()):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | 
SELECT xpath_long('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | 
struct<xpath_long(<a><b>1</b><b>2</b></a>, sum(a/b)):bigint> |
 | org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | 
SELECT xpath_short('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | 
struct<xpath_short(<a><b>1</b><b>2</b></a>, sum(a/b)):smallint> |
-| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | 
SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | 
struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
+| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | 
SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | 
struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
\ No newline at end of file
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
index ab4bc73..ac740bd 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
@@ -5,7 +5,7 @@
 -- !query
 select null, Null, nUll
 -- !query schema
-struct<NULL:null,NULL:null,NULL:null>
+struct<NULL:void,NULL:void,NULL:void>
 -- !query output
 NULL   NULL    NULL
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 3f4399f..3f01c8f 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -74,7 +74,7 @@ select left(null, -2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'substring(NULL, 1, -2)' due to data type mismatch: argument 1 
requires (string or binary) type, however, 'NULL' is of null type.; line 1 pos 7
+cannot resolve 'substring(NULL, 1, -2)' due to data type mismatch: argument 1 
requires (string or binary) type, however, 'NULL' is of void type.; line 1 pos 7
 
 
 -- !query
@@ -101,7 +101,7 @@ select right(null, -2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'substring(NULL, (- -2), 2147483647)' due to data type 
mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of 
null type.; line 1 pos 7
+cannot resolve 'substring(NULL, (- -2), 2147483647)' due to data type 
mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of 
void type.; line 1 pos 7
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out 
b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
index 9943b93..12dcf33 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
@@ -49,7 +49,7 @@ two   2
 -- !query
 select * from values ("one", null), ("two", null) as data(a, b)
 -- !query schema
-struct<a:string,b:null>
+struct<a:string,b:void>
 -- !query output
 one    NULL
 two    NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out 
b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index ab4bc73..ac740bd 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -5,7 +5,7 @@
 -- !query
 select null, Null, nUll
 -- !query schema
-struct<NULL:null,NULL:null,NULL:null>
+struct<NULL:void,NULL:void,NULL:void>
 -- !query output
 NULL   NULL    NULL
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
index bb08d07..bf29cc2 100644
--- a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -7,7 +7,7 @@ select typeof(null)
 -- !query schema
 struct<typeof(NULL):string>
 -- !query output
-null
+void
 
 
 -- !query
@@ -61,7 +61,7 @@ array<int>    map<int,int>    struct<a:int,b:string>
 -- !query
 SELECT assert_true(true), assert_true(boolean(1))
 -- !query schema
-struct<assert_true(true, 'true' is not true!):null,assert_true(1, 'cast(1 as 
boolean)' is not true!):null>
+struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as 
boolean)' is not true!):void>
 -- !query output
 NULL   NULL
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
index 1e59036..d3674d6 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
@@ -308,7 +308,7 @@ struct<1:int>
 -- !query
 select foo.* from (select null) as foo
 -- !query schema
-struct<NULL:null>
+struct<NULL:void>
 -- !query output
 NULL
 
@@ -316,7 +316,7 @@ NULL
 -- !query
 select foo.* from (select 'xyzzy',1,null) as foo
 -- !query schema
-struct<xyzzy:string,1:int,NULL:null>
+struct<xyzzy:string,1:int,NULL:void>
 -- !query output
 xyzzy  1       NULL
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
index 2387dd2..e1c7a7f 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
@@ -130,7 +130,7 @@ select concat_ws(',',10,20,null,30)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'concat_ws(',', 10, 20, NULL, 30)' due to data type mismatch: 
argument 2 requires (array<string> or string) type, however, '10' is of int 
type. argument 3 requires (array<string> or string) type, however, '20' is of 
int type. argument 4 requires (array<string> or string) type, however, 'NULL' 
is of null type. argument 5 requires (array<string> or string) type, however, 
'30' is of int type.; line 1 pos 7
+cannot resolve 'concat_ws(',', 10, 20, NULL, 30)' due to data type mismatch: 
argument 2 requires (array<string> or string) type, however, '10' is of int 
type. argument 3 requires (array<string> or string) type, however, '20' is of 
int type. argument 4 requires (array<string> or string) type, however, 'NULL' 
is of void type. argument 5 requires (array<string> or string) type, however, 
'30' is of int type.; line 1 pos 7
 
 
 -- !query
@@ -139,7 +139,7 @@ select concat_ws('',10,20,null,30)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'concat_ws('', 10, 20, NULL, 30)' due to data type mismatch: 
argument 2 requires (array<string> or string) type, however, '10' is of int 
type. argument 3 requires (array<string> or string) type, however, '20' is of 
int type. argument 4 requires (array<string> or string) type, however, 'NULL' 
is of null type. argument 5 requires (array<string> or string) type, however, 
'30' is of int type.; line 1 pos 7
+cannot resolve 'concat_ws('', 10, 20, NULL, 30)' due to data type mismatch: 
argument 2 requires (array<string> or string) type, however, '10' is of int 
type. argument 3 requires (array<string> or string) type, however, '20' is of 
int type. argument 4 requires (array<string> or string) type, however, 'NULL' 
is of void type. argument 5 requires (array<string> or string) type, however, 
'30' is of int type.; line 1 pos 7
 
 
 -- !query
@@ -148,7 +148,7 @@ select concat_ws(NULL,10,20,null,30) is null
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'concat_ws(CAST(NULL AS STRING), 10, 20, NULL, 30)' due to data 
type mismatch: argument 2 requires (array<string> or string) type, however, 
'10' is of int type. argument 3 requires (array<string> or string) type, 
however, '20' is of int type. argument 4 requires (array<string> or string) 
type, however, 'NULL' is of null type. argument 5 requires (array<string> or 
string) type, however, '30' is of int type.; line 1 pos 7
+cannot resolve 'concat_ws(CAST(NULL AS STRING), 10, 20, NULL, 30)' due to data 
type mismatch: argument 2 requires (array<string> or string) type, however, 
'10' is of int type. argument 3 requires (array<string> or string) type, 
however, '20' is of int type. argument 4 requires (array<string> or string) 
type, however, 'NULL' is of void type. argument 5 requires (array<string> or 
string) type, however, '30' is of int type.; line 1 pos 7
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
index 3549f2c..065424d 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
@@ -5,7 +5,7 @@
 -- !query
 SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)
 -- !query schema
-struct<ifnull(NULL, x):string,ifnull(y, x):string,ifnull(NULL, NULL):null>
+struct<ifnull(NULL, x):string,ifnull(y, x):string,ifnull(NULL, NULL):void>
 -- !query output
 x      y       NULL
 
@@ -21,7 +21,7 @@ NULL  x
 -- !query
 SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)
 -- !query schema
-struct<nvl(NULL, x):string,nvl(y, x):string,nvl(NULL, NULL):null>
+struct<nvl(NULL, x):string,nvl(y, x):string,nvl(NULL, NULL):void>
 -- !query output
 x      y       NULL
 
@@ -29,7 +29,7 @@ x     y       NULL
 -- !query
 SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)
 -- !query schema
-struct<nvl2(NULL, x, y):string,nvl2(n, x, y):string,nvl2(NULL, NULL, 
NULL):null>
+struct<nvl2(NULL, x, y):string,nvl2(n, x, y):string,nvl2(NULL, NULL, 
NULL):void>
 -- !query output
 y      x       NULL
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
index 157b2ce..cd85308 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
@@ -89,7 +89,7 @@ Table-valued function range with alternatives:
     range(start: long, end: long, step: long)
     range(start: long, end: long)
     range(end: long)
-cannot be applied to (integer, null): Incompatible input data type. Expected: 
long; Found: null; line 1 pos 14
+cannot be applied to (integer, void): Incompatible input data type. Expected: 
long; Found: void; line 1 pos 14
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out 
b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
index 78e9190..2872f1b 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
@@ -49,7 +49,7 @@ two   2
 -- !query
 select udf(a), b from values ("one", null), ("two", null) as data(a, b)
 -- !query schema
-struct<udf(a):string,b:null>
+struct<udf(a):string,b:void>
 -- !query output
 one    NULL
 two    NULL
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 6452e67..c71f667 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -421,7 +421,7 @@ class FileBasedDataSourceSuite extends QueryTest
         ""
       }
       def errorMessage(format: String): String = {
-        s"$format data source does not support null data type."
+        s"$format data source does not support void data type."
       }
       withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> useV1List) {
         withTempDir { dir =>
diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index cdee339..bfff5d7 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -375,7 +375,6 @@ object SparkExecuteStatementOperation {
   def getTableSchema(structType: StructType): TableSchema = {
     val schema = structType.map { field =>
       val attrTypeString = field.dataType match {
-        case NullType => "void"
         case CalendarIntervalType => StringType.catalogString
         case _: YearMonthIntervalType => "interval_year_month"
         case _: DayTimeIntervalType => "interval_day_time"
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 87cd852..b1c83af 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -1000,7 +1000,7 @@ private[hive] object HiveClientImpl extends Logging {
     // When reading data in parquet, orc, or avro file format with string type 
for char,
     // the tailing spaces may lost if we are not going to pad it.
     val typeString = CharVarcharUtils.getRawTypeString(c.metadata)
-      .getOrElse(HiveVoidType.replaceVoidType(c.dataType).catalogString)
+      .getOrElse(c.dataType.catalogString)
     new FieldSchema(c.name, typeString, c.getComment().orNull)
   }
 
@@ -1278,22 +1278,3 @@ private[hive] object HiveClientImpl extends Logging {
     hiveConf
   }
 }
-
-private[hive] case object HiveVoidType extends DataType {
-  override def defaultSize: Int = 1
-  override def asNullable: DataType = HiveVoidType
-  override def simpleString: String = "void"
-
-  def replaceVoidType(dt: DataType): DataType = dt match {
-    case ArrayType(et, nullable) =>
-      ArrayType(replaceVoidType(et), nullable)
-    case MapType(kt, vt, nullable) =>
-      MapType(replaceVoidType(kt), replaceVoidType(vt), nullable)
-    case StructType(fields) =>
-      StructType(fields.map { field =>
-        field.copy(dataType = replaceVoidType(field.dataType))
-      })
-    case _: NullType => HiveVoidType
-    case _ => dt
-  }
-}
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 7f42b3c..6d8938b 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2393,12 +2393,12 @@ class HiveDDLSuite
     }
   }
 
-  test("SPARK-36241: support creating tables with null datatype") {
-    // CTAS with null type
+  test("SPARK-36241: support creating tables with void datatype") {
+    // CTAS with void type
     withTable("t1", "t2", "t3") {
       assertAnalysisError(
         "CREATE TABLE t1 USING PARQUET AS SELECT NULL AS null_col",
-        "Parquet data source does not support null data type")
+        "Parquet data source does not support void data type")
 
       assertAnalysisError(
         "CREATE TABLE t2 STORED AS PARQUET AS SELECT null as null_col",
@@ -2408,11 +2408,11 @@ class HiveDDLSuite
       checkAnswer(sql("SELECT * FROM t3"), Row(null))
     }
 
-    // Create table with null type
+    // Create table with void type
     withTable("t1", "t2", "t3", "t4") {
       assertAnalysisError(
         "CREATE TABLE t1 (v VOID) USING PARQUET",
-        "Parquet data source does not support null data type")
+        "Parquet data source does not support void data type")
 
       assertAnalysisError(
         "CREATE TABLE t2 (v VOID) STORED AS PARQUET",
@@ -2425,7 +2425,7 @@ class HiveDDLSuite
       checkAnswer(sql("SELECT * FROM t4"), Seq.empty)
     }
 
-    // Create table with null type using spark.catalog.createTable
+    // Create table with void type using spark.catalog.createTable
     withTable("t") {
       val schema = new StructType().add("c", NullType)
       spark.catalog.createTable(
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
index e94e0b3..a66c337 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -121,7 +121,7 @@ class HiveOrcSourceSuite extends OrcSuite with 
TestHiveSingleton {
       msg = intercept[AnalysisException] {
         sql("select null").write.mode("overwrite").orc(orcDir)
       }.getMessage
-      assert(msg.contains("ORC data source does not support null data type."))
+      assert(msg.contains("ORC data source does not support void data type."))
 
       msg = intercept[AnalysisException] {
         spark.udf.register("testType", () => new IntervalData())

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.2 updated: [SPARK-36224][SQL] Use Void as the type name of NullType

Reply via email to