[spark] branch branch-3.2 updated: [SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE

wenchen Thu, 08 Jul 2021 10:23:31 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 5ec2ddf  [SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE
5ec2ddf is described below

commit 5ec2ddff6a2a468b1925422ec74ed710f479d5fd
Author: PengLei <peng.8...@gmail.com>
AuthorDate: Fri Jul 9 01:21:38 2021 +0800

    [SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE
    
    ### What changes were proposed in this pull request?
    When exec the command `SHOW CREATE TABLE`, we should not lost the info null 
flag if the table column that
    is specified `NOT NULL`
    
    ### Why are the changes needed?
    [SPARK-36012](https://issues.apache.org/jira/browse/SPARK-36012)
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Add UT test for V1 and existed UT for V2
    
    Closes #33219 from Peng-Lei/SPARK-36012.
    
    Authored-by: PengLei <peng.8...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit e071721a51e50ea836971fa7a21ccb9556a99c29)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../org/apache/spark/sql/types/StructField.scala   |  9 ++-
 .../spark/sql/DataFrameSetOperationsSuite.scala    | 89 +++++++++++++++++-----
 .../apache/spark/sql/ShowCreateTableSuite.scala    | 27 +++++--
 .../spark/sql/connector/DataSourceV2SQLSuite.scala |  4 +-
 .../spark/sql/hive/HiveShowCreateTableSuite.scala  |  3 +-
 5 files changed, 98 insertions(+), 34 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
index f4c7370..93d57a7f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -97,9 +97,12 @@ case class StructField(
 
   /**
    * Returns a string containing a schema in DDL format. For example, the 
following value:
-   * `StructField("eventId", IntegerType)` will be converted to `eventId` INT.
-   *
+   * `StructField("eventId", IntegerType, false)` will be converted to 
`eventId` INT NOT NULL.
+   * `StructField("eventId", IntegerType, true)` will be converted to 
`eventId` INT.
    * @since 2.4.0
    */
-  def toDDL: String = s"${quoteIdentifier(name)} ${dataType.sql}$getDDLComment"
+  def toDDL: String = {
+    val nullString = if (nullable) "" else " NOT NULL"
+    s"${quoteIdentifier(name)} ${dataType.sql}${nullString}$getDDLComment"
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index fcd3e83..e3259a2 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -654,7 +654,13 @@ class DataFrameSetOperationsSuite extends QueryTest with 
SharedSparkSession {
         Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, 
null), 2) :: Nil
     )
 
-    assert(unionDf.schema.toDDL == "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: 
INT>,`idx` INT")
+    var schema = new StructType()
+      .add("a", new StructType()
+        .add("_1", IntegerType, true)
+        .add("_2", IntegerType, true)
+        .add("_3", IntegerType, true), true)
+      .add("idx", IntegerType, false)
+    assert(unionDf.schema == schema)
 
     unionDf = df1.unionByName(df2, true).unionByName(df3, true)
 
@@ -669,8 +675,14 @@ class DataFrameSetOperationsSuite extends QueryTest with 
SharedSparkSession {
         Row(Row(110, 111, 112, 113), 1) ::
         Row(Row(120, 121, 122, 123), 2) :: Nil // df3
     )
-    assert(unionDf.schema.toDDL ==
-      "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT, `_4`: INT>,`idx` INT")
+    schema = new StructType()
+      .add("a", new StructType()
+        .add("_1", IntegerType, true)
+        .add("_2", IntegerType, true)
+        .add("_3", IntegerType, true)
+        .add("_4", IntegerType, true), true)
+      .add("idx", IntegerType, false)
+    assert(unionDf.schema == schema)
   }
 
   test("SPARK-32376: Make unionByName null-filling behavior work with struct 
columns - nested") {
@@ -678,26 +690,38 @@ class DataFrameSetOperationsSuite extends QueryTest with 
SharedSparkSession {
     val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a")
 
     var unionDf = df1.unionByName(df2, true)
-    assert(unionDf.schema.toDDL ==
-      "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
-        "`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>")
+    val schema1 = new StructType()
+      .add("id", IntegerType, false)
+      .add("a", new StructType()
+        .add("a", IntegerType, true)
+        .add("b", LongType, true)
+        .add("nested", new StructType()
+          .add("a", IntegerType, true)
+          .add("c", StringType, true)
+          .add("b", LongType, true), true), true)
+    assert(unionDf.schema == schema1)
     checkAnswer(unionDf,
       Row(0, Row(0, 1, Row(1, "2", null))) ::
         Row(1, Row(1, 2, Row(2, null, 3L))) :: Nil)
 
     unionDf = df2.unionByName(df1, true)
-    assert(unionDf.schema.toDDL ==
-      "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
-        "`nested`: STRUCT<`a`: INT, `b`: BIGINT, `c`: STRING>>")
+    val schema2 = new StructType()
+      .add("id", IntegerType, false)
+      .add("a", new StructType()
+        .add("a", IntegerType, true)
+        .add("b", LongType, true)
+        .add("nested", new StructType()
+          .add("a", IntegerType, true)
+          .add("b", LongType, true)
+          .add("c", StringType, true), true), true)
+    assert(unionDf.schema== schema2)
     checkAnswer(unionDf,
       Row(1, Row(1, 2, Row(2, 3L, null))) ::
         Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil)
 
     val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a")
     unionDf = df1.unionByName(df3, true)
-    assert(unionDf.schema.toDDL ==
-      "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
-        "`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>")
+    assert(unionDf.schema == schema1)
     checkAnswer(unionDf,
       Row(0, Row(0, 1, Row(1, "2", null))) ::
         Row(2, Row(2, 3, null)) :: Nil)
@@ -710,26 +734,49 @@ class DataFrameSetOperationsSuite extends QueryTest with 
SharedSparkSession {
       val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", 
"a")
 
       var unionDf = df1.unionByName(df2, true)
-      assert(unionDf.schema.toDDL ==
-        "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
-          "`nested`: STRUCT<`a`: INT, `c`: STRING, `A`: INT, `b`: BIGINT>>")
+      var schema = new StructType()
+        .add("id", IntegerType, false)
+        .add("a", new StructType()
+          .add("a", IntegerType, true)
+          .add("b", LongType, true)
+          .add("nested", new StructType()
+            .add("a", IntegerType, true)
+            .add("c", StringType, true)
+            .add("A", IntegerType, true)
+            .add("b", LongType, true), true), true)
+      assert(unionDf.schema == schema)
       checkAnswer(unionDf,
         Row(0, Row(0, 1, Row(1, "2", null, null))) ::
           Row(1, Row(1, 2, Row(null, null, 2, 3L))) :: Nil)
 
       unionDf = df2.unionByName(df1, true)
-      assert(unionDf.schema.toDDL ==
-        "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
-          "`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT, `c`: STRING>>")
+      schema = new StructType()
+        .add("id", IntegerType, false)
+        .add("a", new StructType()
+          .add("a", IntegerType, true)
+          .add("b", LongType, true)
+          .add("nested", new StructType()
+            .add("A", IntegerType, true)
+            .add("b", LongType, true)
+            .add("a", IntegerType, true)
+            .add("c", StringType, true), true), true)
+      assert(unionDf.schema == schema)
       checkAnswer(unionDf,
         Row(1, Row(1, 2, Row(2, 3L, null, null))) ::
           Row(0, Row(0, 1, Row(null, null, 1, "2"))) :: Nil)
 
       val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", 
"a")
       unionDf = df2.unionByName(df3, true)
-      assert(unionDf.schema.toDDL ==
-        "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
-          "`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT>>")
+      schema = new StructType()
+        .add("id", IntegerType, false)
+        .add("a", new StructType()
+          .add("a", IntegerType, true)
+          .add("b", LongType, true)
+          .add("nested", new StructType()
+            .add("A", IntegerType, true)
+            .add("b", LongType, true)
+            .add("a", IntegerType, true), true), true)
+      assert(unionDf.schema == schema)
       checkAnswer(unionDf,
         Row(1, Row(1, 2, Row(2, 3L, null))) ::
           Row(2, Row(2, 3, Row(null, 5L, 4))) :: Nil)
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
index 5ce5d36..6839294 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.sources.SimpleInsertSource
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 import org.apache.spark.util.Utils
 
@@ -176,19 +177,31 @@ abstract class ShowCreateTableSuite extends QueryTest 
with SQLTestUtils {
       val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)"
       sql(s"$createTable USING json")
       val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
-      assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: 
STRING>)")
+      assert(shownDDL == "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: 
STRING>) USING json")
 
       checkCreateTable("t1")
     }
   }
 
+  test("SPARK-36012: Add NULL flag when SHOW CREATE TABLE") {
+    val t = "SPARK_36012"
+    withTable(t) {
+      sql(
+        s"""
+           |CREATE TABLE $t (
+           |  a bigint NOT NULL,
+           |  b bigint
+           |)
+           |USING ${classOf[SimpleInsertSource].getName}
+        """.stripMargin)
+      val showDDL = getShowDDL(s"SHOW CREATE TABLE $t")
+      assert(showDDL == s"CREATE TABLE `default`.`$t` ( `a` BIGINT NOT NULL," +
+        s" `b` BIGINT) USING ${classOf[SimpleInsertSource].getName}")
+    }
+  }
+
   protected def getShowDDL(showCreateTableSql: String): String = {
-    val result = sql(showCreateTableSql)
-      .head()
-      .getString(0)
-      .split("\n")
-      .map(_.trim)
-    if (result.length > 1) result(0) + result(1) else result.head
+    
sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" 
")
   }
 
   protected def checkCreateTable(table: String, serde: Boolean = false): Unit 
= {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index bdf198b..4f1f4c2 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1978,7 +1978,7 @@ class DataSourceV2SQLSuite
       sql(
         s"""
            |CREATE TABLE $t (
-           |  a bigint,
+           |  a bigint NOT NULL,
            |  b bigint,
            |  c bigint,
            |  `extra col` ARRAY<INT>,
@@ -1996,7 +1996,7 @@ class DataSourceV2SQLSuite
       val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t")
       assert(showDDL === Array(
         "CREATE TABLE testcat.ns1.ns2.tbl (",
-        "`a` BIGINT,",
+        "`a` BIGINT NOT NULL,",
         "`b` BIGINT,",
         "`c` BIGINT,",
         "`extra col` ARRAY<INT>,",
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
index 2fb67c7..e3a1034 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
@@ -247,7 +247,8 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite 
with TestHiveSinglet
       val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING 
hive"
       sql(createTable)
       val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
-      assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: 
STRING>)")
+      assert(shownDDL.substring(0, shownDDL.indexOf(" USING")) ==
+        "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>)")
 
       checkCreateTable("t1", serde = true)
     }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.2 updated: [SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE

Reply via email to