spark git commit: [SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema

yhuai Wed, 13 Jul 2016 15:24:08 -0700

Repository: spark
Updated Branches:
  refs/heads/master fb2e8eeb0 -> c5ec87982



[SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred 
Schema

#### What changes were proposed in this pull request?
If we create a table pointing to a parquet/json datasets without specifying the 
schema, describe table command does not show the schema at all. It only shows 
`# Schema of this table is inferred at runtime`. In 1.6, describe table does 
show the schema of such a table.

~~For data source tables, to infer the schema, we need to load the data source 
tables at runtime. Thus, this PR calls the function `lookupRelation`.~~

For data source tables, we infer the schema before table creation. Thus, this 
PR set the inferred schema as the table schema when table creation.

#### How was this patch tested?
Added test cases

Author: gatorsmile <gatorsm...@gmail.com>

Closes #14148 from gatorsmile/describeSchema.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5ec8798
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5ec8798
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5ec8798

Branch: refs/heads/master
Commit: c5ec879828369ec1d21acd7f18a792306634ff74
Parents: fb2e8ee
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Wed Jul 13 15:23:37 2016 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Wed Jul 13 15:23:37 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/execution/command/tables.scala    | 28 +++++++++-----------
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 16 ++++++-----
 2 files changed, 22 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c5ec8798/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 5c815df..6651c33 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -413,29 +413,29 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
     } else {
       val metadata = catalog.getTableMetadata(table)
 
+      if (DDLUtils.isDatasourceTable(metadata)) {
+        DDLUtils.getSchemaFromTableProperties(metadata) match {
+          case Some(userSpecifiedSchema) => 
describeSchema(userSpecifiedSchema, result)
+          case None => describeSchema(catalog.lookupRelation(table).schema, 
result)
+        }
+      } else {
+        describeSchema(metadata.schema, result)
+      }
+
       if (isExtended) {
         describeExtended(metadata, result)
       } else if (isFormatted) {
         describeFormatted(metadata, result)
       } else {
-        describe(metadata, result)
+        describePartitionInfo(metadata, result)
       }
     }
 
     result
   }
 
-  // Shows data columns and partitioned columns (if any)
-  private def describe(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
+  private def describePartitionInfo(table: CatalogTable, buffer: 
ArrayBuffer[Row]): Unit = {
     if (DDLUtils.isDatasourceTable(table)) {
-      val schema = DDLUtils.getSchemaFromTableProperties(table)
-
-      if (schema.isEmpty) {
-        append(buffer, "# Schema of this table is inferred at runtime", "", "")
-      } else {
-        schema.foreach(describeSchema(_, buffer))
-      }
-
       val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
       if (partCols.nonEmpty) {
         append(buffer, "# Partition Information", "", "")
@@ -443,8 +443,6 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
         partCols.foreach(col => append(buffer, col, "", ""))
       }
     } else {
-      describeSchema(table.schema, buffer)
-
       if (table.partitionColumns.nonEmpty) {
         append(buffer, "# Partition Information", "", "")
         append(buffer, s"# ${output.head.name}", output(1).name, 
output(2).name)
@@ -454,14 +452,14 @@ case class DescribeTableCommand(table: TableIdentifier, 
isExtended: Boolean, isF
   }
 
   private def describeExtended(table: CatalogTable, buffer: ArrayBuffer[Row]): 
Unit = {
-    describe(table, buffer)
+    describePartitionInfo(table, buffer)
 
     append(buffer, "", "", "")
     append(buffer, "# Detailed Table Information", table.toString, "")
   }
 
   private def describeFormatted(table: CatalogTable, buffer: 
ArrayBuffer[Row]): Unit = {
-    describe(table, buffer)
+    describePartitionInfo(table, buffer)
 
     append(buffer, "", "", "")
     append(buffer, "# Detailed Table Information", "", "")

http://git-wip-us.apache.org/repos/asf/spark/blob/c5ec8798/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 343d7ba..9228242 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -612,15 +612,17 @@ class HiveDDLSuite
   }
 
   test("desc table for data source table - no user-defined schema") {
-    withTable("t1") {
-      withTempPath { dir =>
-        val path = dir.getCanonicalPath
-        spark.range(1).write.parquet(path)
-        sql(s"CREATE TABLE t1 USING parquet OPTIONS (PATH '$path')")
+    Seq("parquet", "json", "orc").foreach { fileFormat =>
+      withTable("t1") {
+        withTempPath { dir =>
+          val path = dir.getCanonicalPath
+          spark.range(1).write.format(fileFormat).save(path)
+          sql(s"CREATE TABLE t1 USING $fileFormat OPTIONS (PATH '$path')")
 
-        val desc = sql("DESC FORMATTED t1").collect().toSeq
+          val desc = sql("DESC FORMATTED t1").collect().toSeq
 
-        assert(desc.contains(Row("# Schema of this table is inferred at 
runtime", "", "")))
+          assert(desc.contains(Row("id", "bigint", "")))
+        }
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema

Reply via email to