spark git commit: [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables

2016-09-03 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 a7f5e7066 -> 3500dbc9b


[SPARK-16663][SQL] desc table should be consistent between data source and hive 
serde tables

Currently there are 2 inconsistence:

1. for data source table, we only print partition names, for hive table, we 
also print partition schema. After this PR, we will always print schema
2. if column doesn't have comment, data source table will print empty string, 
hive table will print null. After this PR, we will always print null

new test in `HiveDDLSuite`

Author: Wenchen Fan 

Closes #14302 from cloud-fan/minor3.

(cherry picked from commit a2abb583caaec9a2cecd5d65b05d172fc096c125)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3500dbc9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3500dbc9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3500dbc9

Branch: refs/heads/branch-2.0
Commit: 3500dbc9bcce243b6656f308ee4941de0350d198
Parents: a7f5e70
Author: Wenchen Fan 
Authored: Tue Jul 26 18:46:12 2016 +0800
Committer: Wenchen Fan 
Committed: Sun Sep 4 00:15:57 2016 +0800

--
 .../spark/sql/execution/command/tables.scala| 11 +++
 .../apache/spark/sql/sources/DDLTestSuite.scala | 30 ++--
 .../sql/hive/MetastoreDataSourcesSuite.scala|  2 +-
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 30 +++-
 .../sql/hive/execution/HiveQuerySuite.scala |  4 +--
 5 files changed, 47 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index a5ccbcf..7e6a352 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -436,11 +436,12 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
 
   private def describePartitionInfo(table: CatalogTable, buffer: 
ArrayBuffer[Row]): Unit = {
 if (DDLUtils.isDatasourceTable(table)) {
-  val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
-  if (partCols.nonEmpty) {
+  val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table)
+  val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table)
+  for (schema <- userSpecifiedSchema if partColNames.nonEmpty) {
 append(buffer, "# Partition Information", "", "")
-append(buffer, s"# ${output.head.name}", "", "")
-partCols.foreach(col => append(buffer, col, "", ""))
+append(buffer, s"# ${output.head.name}", output(1).name, 
output(2).name)
+describeSchema(StructType(partColNames.map(schema(_))), buffer)
   }
 } else {
   if (table.partitionColumns.nonEmpty) {
@@ -527,7 +528,7 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
   private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): 
Unit = {
 schema.foreach { column =>
   val comment =
-if (column.metadata.contains("comment")) 
column.metadata.getString("comment") else ""
+if (column.metadata.contains("comment")) 
column.metadata.getString("comment") else null
   append(buffer, column.name, column.dataType.simpleString, comment)
 }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/3500dbc9/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
index 5a7a907..c2aedff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
@@ -98,21 +98,21 @@ class DDLTestSuite extends DataSourceTest with 
SharedSQLContext {
   "describe ddlPeople",
   Seq(
 Row("intType", "int", "test comment test1"),
-Row("stringType", "string", ""),
-Row("dateType", "date", ""),
-Row("timestampType", "timestamp", ""),
-Row("doubleType", "double", ""),
-Row("bigintType", "bigint", ""),
-Row("tinyintType", "tinyint", ""),
-Row("decimalType", "decimal(10,0)", ""),
-Row("fixedDecimalType", "decimal(5,1)", ""),
-

spark git commit: [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables

2016-07-26 Thread lian
Repository: spark
Updated Branches:
  refs/heads/master 4c9695598 -> a2abb583c


[SPARK-16663][SQL] desc table should be consistent between data source and hive 
serde tables

## What changes were proposed in this pull request?

Currently there are 2 inconsistence:

1. for data source table, we only print partition names, for hive table, we 
also print partition schema. After this PR, we will always print schema
2. if column doesn't have comment, data source table will print empty string, 
hive table will print null. After this PR, we will always print null

## How was this patch tested?

new test in `HiveDDLSuite`

Author: Wenchen Fan 

Closes #14302 from cloud-fan/minor3.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2abb583
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2abb583
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2abb583

Branch: refs/heads/master
Commit: a2abb583caaec9a2cecd5d65b05d172fc096c125
Parents: 4c96955
Author: Wenchen Fan 
Authored: Tue Jul 26 18:46:12 2016 +0800
Committer: Cheng Lian 
Committed: Tue Jul 26 18:46:12 2016 +0800

--
 .../spark/sql/execution/command/tables.scala| 12 
 .../apache/spark/sql/sources/DDLTestSuite.scala | 30 ++--
 .../sql/hive/MetastoreDataSourcesSuite.scala|  2 +-
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 30 +++-
 .../sql/hive/execution/HiveQuerySuite.scala |  4 +--
 5 files changed, 47 insertions(+), 31 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index c6daa95..8263380 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -439,11 +439,12 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
 
   private def describePartitionInfo(table: CatalogTable, buffer: 
ArrayBuffer[Row]): Unit = {
 if (DDLUtils.isDatasourceTable(table)) {
-  val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
-  if (partCols.nonEmpty) {
+  val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table)
+  val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table)
+  for (schema <- userSpecifiedSchema if partColNames.nonEmpty) {
 append(buffer, "# Partition Information", "", "")
-append(buffer, s"# ${output.head.name}", "", "")
-partCols.foreach(col => append(buffer, col, "", ""))
+append(buffer, s"# ${output.head.name}", output(1).name, 
output(2).name)
+describeSchema(StructType(partColNames.map(schema(_))), buffer)
   }
 } else {
   if (table.partitionColumns.nonEmpty) {
@@ -525,8 +526,7 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
 
   private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): 
Unit = {
 schema.foreach { column =>
-  val comment = column.getComment().getOrElse("")
-  append(buffer, column.name, column.dataType.simpleString, comment)
+  append(buffer, column.name, column.dataType.simpleString, 
column.getComment().orNull)
 }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
index d0ad319..e535d4d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
@@ -97,21 +97,21 @@ class DDLTestSuite extends DataSourceTest with 
SharedSQLContext {
   "describe ddlPeople",
   Seq(
 Row("intType", "int", "test comment test1"),
-Row("stringType", "string", ""),
-Row("dateType", "date", ""),
-Row("timestampType", "timestamp", ""),
-Row("doubleType", "double", ""),
-Row("bigintType", "bigint", ""),
-Row("tinyintType", "tinyint", ""),
-Row("decimalType", "decimal(10,0)", ""),
-Row("fixedDecimalType", "decimal(5,1)", ""),
-Row("binaryType", "binary", ""),
-Row("booleanType", "boolean", ""),
-Row("smallIntType", "smallint",