Repository: spark Updated Branches: refs/heads/branch-2.0 a7f5e7066 -> 3500dbc9b
[SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables Currently there are 2 inconsistence: 1. for data source table, we only print partition names, for hive table, we also print partition schema. After this PR, we will always print schema 2. if column doesn't have comment, data source table will print empty string, hive table will print null. After this PR, we will always print null new test in `HiveDDLSuite` Author: Wenchen Fan <wenc...@databricks.com> Closes #14302 from cloud-fan/minor3. (cherry picked from commit a2abb583caaec9a2cecd5d65b05d172fc096c125) Signed-off-by: Wenchen Fan <wenc...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3500dbc9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3500dbc9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3500dbc9 Branch: refs/heads/branch-2.0 Commit: 3500dbc9bcce243b6656f308ee4941de0350d198 Parents: a7f5e70 Author: Wenchen Fan <wenc...@databricks.com> Authored: Tue Jul 26 18:46:12 2016 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Sun Sep 4 00:15:57 2016 +0800 ---------------------------------------------------------------------- .../spark/sql/execution/command/tables.scala | 11 +++---- .../apache/spark/sql/sources/DDLTestSuite.scala | 30 ++++++++++---------- .../sql/hive/MetastoreDataSourcesSuite.scala | 2 +- .../spark/sql/hive/execution/HiveDDLSuite.scala | 30 +++++++++++++++----- .../sql/hive/execution/HiveQuerySuite.scala | 4 +-- 5 files changed, 47 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index a5ccbcf..7e6a352 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -436,11 +436,12 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { if (DDLUtils.isDatasourceTable(table)) { - val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table) - if (partCols.nonEmpty) { + val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table) + val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table) + for (schema <- userSpecifiedSchema if partColNames.nonEmpty) { append(buffer, "# Partition Information", "", "") - append(buffer, s"# ${output.head.name}", "", "") - partCols.foreach(col => append(buffer, col, "", "")) + append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) + describeSchema(StructType(partColNames.map(schema(_))), buffer) } } else { if (table.partitionColumns.nonEmpty) { @@ -527,7 +528,7 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = { schema.foreach { column => val comment = - if (column.metadata.contains("comment")) column.metadata.getString("comment") else "" + if (column.metadata.contains("comment")) column.metadata.getString("comment") else null append(buffer, column.name, column.dataType.simpleString, comment) } } http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index 5a7a907..c2aedff 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -98,21 +98,21 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext { "describe ddlPeople", Seq( Row("intType", "int", "test comment test1"), - Row("stringType", "string", ""), - Row("dateType", "date", ""), - Row("timestampType", "timestamp", ""), - Row("doubleType", "double", ""), - Row("bigintType", "bigint", ""), - Row("tinyintType", "tinyint", ""), - Row("decimalType", "decimal(10,0)", ""), - Row("fixedDecimalType", "decimal(5,1)", ""), - Row("binaryType", "binary", ""), - Row("booleanType", "boolean", ""), - Row("smallIntType", "smallint", ""), - Row("floatType", "float", ""), - Row("mapType", "map<string,string>", ""), - Row("arrayType", "array<string>", ""), - Row("structType", "struct<f1:string,f2:int>", "") + Row("stringType", "string", null), + Row("dateType", "date", null), + Row("timestampType", "timestamp", null), + Row("doubleType", "double", null), + Row("bigintType", "bigint", null), + Row("tinyintType", "tinyint", null), + Row("decimalType", "decimal(10,0)", null), + Row("fixedDecimalType", "decimal(5,1)", null), + Row("binaryType", "binary", null), + Row("booleanType", "boolean", null), + Row("smallIntType", "smallint", null), + Row("floatType", "float", null), + Row("mapType", "map<string,string>", null), + Row("arrayType", "array<string>", null), + Row("structType", "struct<f1:string,f2:int>", null) )) test("SPARK-7686 DescribeCommand should have correct physical plan output attributes") { http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index af071f9..a0b3b37 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -748,7 +748,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv assert(schema === actualSchema) // Checks the DESCRIBE output. - checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", "") :: Nil) + checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", null) :: Nil) } } http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 1416409..3cf3c6a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -472,6 +472,22 @@ class HiveDDLSuite } } + test("desc table for Hive table - partitioned table") { + withTable("tbl") { + sql("CREATE TABLE tbl(a int) PARTITIONED BY (b int)") + + assert(sql("DESC tbl").collect().containsSlice( + Seq( + Row("a", "int", null), + Row("b", "int", null), + Row("# Partition Information", "", ""), + Row("# col_name", "data_type", "comment"), + Row("b", "int", null) + ) + )) + } + } + test("desc table for data source table using Hive Metastore") { assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive") val tabName = "tab1" @@ -677,7 +693,7 @@ class HiveDDLSuite val desc = sql("DESC FORMATTED t1").collect().toSeq - assert(desc.contains(Row("id", "bigint", ""))) + assert(desc.contains(Row("id", "bigint", null))) } } } @@ -694,13 +710,13 @@ class HiveDDLSuite assert(formattedDesc.containsSlice( Seq( - Row("a", "bigint", ""), - Row("b", "bigint", ""), - Row("c", "bigint", ""), - Row("d", "bigint", ""), + Row("a", "bigint", null), + Row("b", "bigint", null), + Row("c", "bigint", null), + Row("d", "bigint", null), Row("# Partition Information", "", ""), - Row("# col_name", "", ""), - Row("d", "", ""), + Row("# col_name", "data_type", "comment"), + Row("d", "bigint", null), Row("", "", ""), Row("# Detailed Table Information", "", ""), Row("Database:", "default", "") http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index db6c64a..55d7905 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -825,8 +825,8 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { assertResult( Array( - Row("a", "int", ""), - Row("b", "string", "")) + Row("a", "int", null), + Row("b", "string", null)) ) { sql("DESCRIBE test_describe_commands2") .select('col_name, 'data_type, 'comment) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org