This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new 5ec2ddf [SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE 5ec2ddf is described below commit 5ec2ddff6a2a468b1925422ec74ed710f479d5fd Author: PengLei <peng.8...@gmail.com> AuthorDate: Fri Jul 9 01:21:38 2021 +0800 [SPARK-36012][SQL] Add null flag in SHOW CREATE TABLE ### What changes were proposed in this pull request? When exec the command `SHOW CREATE TABLE`, we should not lost the info null flag if the table column that is specified `NOT NULL` ### Why are the changes needed? [SPARK-36012](https://issues.apache.org/jira/browse/SPARK-36012) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add UT test for V1 and existed UT for V2 Closes #33219 from Peng-Lei/SPARK-36012. Authored-by: PengLei <peng.8...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit e071721a51e50ea836971fa7a21ccb9556a99c29) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/types/StructField.scala | 9 ++- .../spark/sql/DataFrameSetOperationsSuite.scala | 89 +++++++++++++++++----- .../apache/spark/sql/ShowCreateTableSuite.scala | 27 +++++-- .../spark/sql/connector/DataSourceV2SQLSuite.scala | 4 +- .../spark/sql/hive/HiveShowCreateTableSuite.scala | 3 +- 5 files changed, 98 insertions(+), 34 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala index f4c7370..93d57a7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala @@ -97,9 +97,12 @@ case class StructField( /** * Returns a string containing a schema in DDL format. For example, the following value: - * `StructField("eventId", IntegerType)` will be converted to `eventId` INT. - * + * `StructField("eventId", IntegerType, false)` will be converted to `eventId` INT NOT NULL. + * `StructField("eventId", IntegerType, true)` will be converted to `eventId` INT. * @since 2.4.0 */ - def toDDL: String = s"${quoteIdentifier(name)} ${dataType.sql}$getDDLComment" + def toDDL: String = { + val nullString = if (nullable) "" else " NOT NULL" + s"${quoteIdentifier(name)} ${dataType.sql}${nullString}$getDDLComment" + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala index fcd3e83..e3259a2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala @@ -654,7 +654,13 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, null), 2) :: Nil ) - assert(unionDf.schema.toDDL == "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT>,`idx` INT") + var schema = new StructType() + .add("a", new StructType() + .add("_1", IntegerType, true) + .add("_2", IntegerType, true) + .add("_3", IntegerType, true), true) + .add("idx", IntegerType, false) + assert(unionDf.schema == schema) unionDf = df1.unionByName(df2, true).unionByName(df3, true) @@ -669,8 +675,14 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { Row(Row(110, 111, 112, 113), 1) :: Row(Row(120, 121, 122, 123), 2) :: Nil // df3 ) - assert(unionDf.schema.toDDL == - "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT, `_4`: INT>,`idx` INT") + schema = new StructType() + .add("a", new StructType() + .add("_1", IntegerType, true) + .add("_2", IntegerType, true) + .add("_3", IntegerType, true) + .add("_4", IntegerType, true), true) + .add("idx", IntegerType, false) + assert(unionDf.schema == schema) } test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - nested") { @@ -678,26 +690,38 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a") var unionDf = df1.unionByName(df2, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>") + val schema1 = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("a", IntegerType, true) + .add("c", StringType, true) + .add("b", LongType, true), true), true) + assert(unionDf.schema == schema1) checkAnswer(unionDf, Row(0, Row(0, 1, Row(1, "2", null))) :: Row(1, Row(1, 2, Row(2, null, 3L))) :: Nil) unionDf = df2.unionByName(df1, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`a`: INT, `b`: BIGINT, `c`: STRING>>") + val schema2 = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("c", StringType, true), true), true) + assert(unionDf.schema== schema2) checkAnswer(unionDf, Row(1, Row(1, 2, Row(2, 3L, null))) :: Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil) val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a") unionDf = df1.unionByName(df3, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`a`: INT, `c`: STRING, `b`: BIGINT>>") + assert(unionDf.schema == schema1) checkAnswer(unionDf, Row(0, Row(0, 1, Row(1, "2", null))) :: Row(2, Row(2, 3, null)) :: Nil) @@ -710,26 +734,49 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession { val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", "a") var unionDf = df1.unionByName(df2, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`a`: INT, `c`: STRING, `A`: INT, `b`: BIGINT>>") + var schema = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("a", IntegerType, true) + .add("c", StringType, true) + .add("A", IntegerType, true) + .add("b", LongType, true), true), true) + assert(unionDf.schema == schema) checkAnswer(unionDf, Row(0, Row(0, 1, Row(1, "2", null, null))) :: Row(1, Row(1, 2, Row(null, null, 2, 3L))) :: Nil) unionDf = df2.unionByName(df1, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT, `c`: STRING>>") + schema = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("A", IntegerType, true) + .add("b", LongType, true) + .add("a", IntegerType, true) + .add("c", StringType, true), true), true) + assert(unionDf.schema == schema) checkAnswer(unionDf, Row(1, Row(1, 2, Row(2, 3L, null, null))) :: Row(0, Row(0, 1, Row(null, null, 1, "2"))) :: Nil) val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", "a") unionDf = df2.unionByName(df3, true) - assert(unionDf.schema.toDDL == - "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " + - "`nested`: STRUCT<`A`: INT, `b`: BIGINT, `a`: INT>>") + schema = new StructType() + .add("id", IntegerType, false) + .add("a", new StructType() + .add("a", IntegerType, true) + .add("b", LongType, true) + .add("nested", new StructType() + .add("A", IntegerType, true) + .add("b", LongType, true) + .add("a", IntegerType, true), true), true) + assert(unionDf.schema == schema) checkAnswer(unionDf, Row(1, Row(1, 2, Row(2, 3L, null))) :: Row(2, Row(2, 3, Row(null, 5L, 4))) :: Nil) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala index 5ce5d36..6839294 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.sources.SimpleInsertSource import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} import org.apache.spark.util.Utils @@ -176,19 +177,31 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils { val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)" sql(s"$createTable USING json") val shownDDL = getShowDDL("SHOW CREATE TABLE t1") - assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)") + assert(shownDDL == "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>) USING json") checkCreateTable("t1") } } + test("SPARK-36012: Add NULL flag when SHOW CREATE TABLE") { + val t = "SPARK_36012" + withTable(t) { + sql( + s""" + |CREATE TABLE $t ( + | a bigint NOT NULL, + | b bigint + |) + |USING ${classOf[SimpleInsertSource].getName} + """.stripMargin) + val showDDL = getShowDDL(s"SHOW CREATE TABLE $t") + assert(showDDL == s"CREATE TABLE `default`.`$t` ( `a` BIGINT NOT NULL," + + s" `b` BIGINT) USING ${classOf[SimpleInsertSource].getName}") + } + } + protected def getShowDDL(showCreateTableSql: String): String = { - val result = sql(showCreateTableSql) - .head() - .getString(0) - .split("\n") - .map(_.trim) - if (result.length > 1) result(0) + result(1) else result.head + sql(showCreateTableSql).head().getString(0).split("\n").map(_.trim).mkString(" ") } protected def checkCreateTable(table: String, serde: Boolean = false): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index bdf198b..4f1f4c2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -1978,7 +1978,7 @@ class DataSourceV2SQLSuite sql( s""" |CREATE TABLE $t ( - | a bigint, + | a bigint NOT NULL, | b bigint, | c bigint, | `extra col` ARRAY<INT>, @@ -1996,7 +1996,7 @@ class DataSourceV2SQLSuite val showDDL = getShowCreateDDL(s"SHOW CREATE TABLE $t") assert(showDDL === Array( "CREATE TABLE testcat.ns1.ns2.tbl (", - "`a` BIGINT,", + "`a` BIGINT NOT NULL,", "`b` BIGINT,", "`c` BIGINT,", "`extra col` ARRAY<INT>,", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala index 2fb67c7..e3a1034 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala @@ -247,7 +247,8 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive" sql(createTable) val shownDDL = getShowDDL("SHOW CREATE TABLE t1") - assert(shownDDL == "CREATE TABLE `default`.`t1` (`a` STRUCT<`b`: STRING>)") + assert(shownDDL.substring(0, shownDDL.indexOf(" USING")) == + "CREATE TABLE `default`.`t1` ( `a` STRUCT<`b`: STRING>)") checkCreateTable("t1", serde = true) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org