spark git commit: [SPARK-21150][SQL] Persistent view stored in Hive metastore should be case preserving

lixiao Tue, 20 Jun 2017 09:16:00 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.2 514a7e6f8 -> b8b80f6de



[SPARK-21150][SQL] Persistent view stored in Hive metastore should be case 
preserving

## What changes were proposed in this pull request?

This is a regression in Spark 2.2. In Spark 2.2, we introduced a new way to 
resolve persisted view: https://issues.apache.org/jira/browse/SPARK-18209 , but 
this makes the persisted view non case-preserving because we store the schema 
in hive metastore directly. We should follow data source table and store schema 
in table properties.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenc...@databricks.com>

Closes #18360 from cloud-fan/view.

(cherry picked from commit e862dc904963cf7832bafc1d3d0ea9090bbddd81)
Signed-off-by: gatorsmile <gatorsm...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b8b80f6d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b8b80f6d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b8b80f6d

Branch: refs/heads/branch-2.2
Commit: b8b80f6dea86d4e4a648b86e38936d3a82ffc0aa
Parents: 514a7e6
Author: Wenchen Fan <wenc...@databricks.com>
Authored: Tue Jun 20 09:15:33 2017 -0700
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Tue Jun 20 09:15:41 2017 -0700

----------------------------------------------------------------------
 .../spark/sql/execution/command/views.scala     |  4 +-
 .../spark/sql/execution/SQLViewSuite.scala      | 10 +++
 .../spark/sql/hive/HiveExternalCatalog.scala    | 84 ++++++++++----------
 3 files changed, 56 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b8b80f6d/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 00f0aca..3518ee5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -159,7 +159,9 @@ case class CreateViewCommand(
         checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
 
         // Handles `CREATE OR REPLACE VIEW v0 AS SELECT ...`
-        catalog.alterTable(prepareTable(sparkSession, analyzedPlan))
+        // Nothing we need to retain from the old view, so just drop and 
create a new one
+        catalog.dropTable(viewIdent, ignoreIfNotExists = false, purge = false)
+        catalog.createTable(prepareTable(sparkSession, analyzedPlan), 
ignoreIfExists = false)
       } else {
         // Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the 
target view already
         // exists.

http://git-wip-us.apache.org/repos/asf/spark/blob/b8b80f6d/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index d32716c..6761f05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -669,4 +669,14 @@ abstract class SQLViewSuite extends QueryTest with 
SQLTestUtils {
         "positive."))
     }
   }
+
+  test("permanent view should be case-preserving") {
+    withView("v") {
+      sql("CREATE VIEW v AS SELECT 1 as aBc")
+      assert(spark.table("v").schema.head.name == "aBc")
+
+      sql("CREATE OR REPLACE VIEW v AS SELECT 2 as cBa")
+      assert(spark.table("v").schema.head.name == "cBa")
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/b8b80f6d/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index a03beb7..f2fe227 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -224,39 +224,36 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
       throw new TableAlreadyExistsException(db = db, table = table)
     }
 
-    if (tableDefinition.tableType == VIEW) {
-      client.createTable(tableDefinition, ignoreIfExists)
+    // Ideally we should not create a managed table with location, but Hive 
serde table can
+    // specify location for managed table. And in 
[[CreateDataSourceTableAsSelectCommand]] we have
+    // to create the table directory and write out data before we create this 
table, to avoid
+    // exposing a partial written table.
+    val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
+      tableDefinition.storage.locationUri.isEmpty
+
+    val tableLocation = if (needDefaultTableLocation) {
+      
Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
     } else {
-      // Ideally we should not create a managed table with location, but Hive 
serde table can
-      // specify location for managed table. And in 
[[CreateDataSourceTableAsSelectCommand]] we have
-      // to create the table directory and write out data before we create 
this table, to avoid
-      // exposing a partial written table.
-      val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
-        tableDefinition.storage.locationUri.isEmpty
-
-      val tableLocation = if (needDefaultTableLocation) {
-        
Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
-      } else {
-        tableDefinition.storage.locationUri
-      }
+      tableDefinition.storage.locationUri
+    }
 
-      if (DDLUtils.isHiveTable(tableDefinition)) {
-        val tableWithDataSourceProps = tableDefinition.copy(
-          // We can't leave `locationUri` empty and count on Hive metastore to 
set a default table
-          // location, because Hive metastore uses 
hive.metastore.warehouse.dir to generate default
-          // table location for tables in default database, while we expect to 
use the location of
-          // default database.
-          storage = tableDefinition.storage.copy(locationUri = tableLocation),
-          // Here we follow data source tables and put table metadata like 
table schema, partition
-          // columns etc. in table properties, so that we can work around the 
Hive metastore issue
-          // about not case preserving and make Hive serde table support 
mixed-case column names.
-          properties = tableDefinition.properties ++ 
tableMetaToTableProps(tableDefinition))
-        client.createTable(tableWithDataSourceProps, ignoreIfExists)
-      } else {
-        createDataSourceTable(
-          tableDefinition.withNewStorage(locationUri = tableLocation),
-          ignoreIfExists)
-      }
+    if (DDLUtils.isDatasourceTable(tableDefinition)) {
+      createDataSourceTable(
+        tableDefinition.withNewStorage(locationUri = tableLocation),
+        ignoreIfExists)
+    } else {
+      val tableWithDataSourceProps = tableDefinition.copy(
+        // We can't leave `locationUri` empty and count on Hive metastore to 
set a default table
+        // location, because Hive metastore uses hive.metastore.warehouse.dir 
to generate default
+        // table location for tables in default database, while we expect to 
use the location of
+        // default database.
+        storage = tableDefinition.storage.copy(locationUri = tableLocation),
+        // Here we follow data source tables and put table metadata like table 
schema, partition
+        // columns etc. in table properties, so that we can work around the 
Hive metastore issue
+        // about not case preserving and make Hive serde table and view 
support mixed-case column
+        // names.
+        properties = tableDefinition.properties ++ 
tableMetaToTableProps(tableDefinition))
+      client.createTable(tableWithDataSourceProps, ignoreIfExists)
     }
   }
 
@@ -669,16 +666,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
 
     var table = inputTable
 
-    if (table.tableType != VIEW) {
-      table.properties.get(DATASOURCE_PROVIDER) match {
-        // No provider in table properties, which means this is a Hive serde 
table.
-        case None =>
-          table = restoreHiveSerdeTable(table)
-
-        // This is a regular data source table.
-        case Some(provider) =>
-          table = restoreDataSourceTable(table, provider)
-      }
+    table.properties.get(DATASOURCE_PROVIDER) match {
+      case None if table.tableType == VIEW =>
+        // If this is a view created by Spark 2.2 or higher versions, we 
should restore its schema
+        // from table properties.
+        if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) {
+          table = table.copy(schema = getSchemaFromTableProperties(table))
+        }
+
+      // No provider in table properties, which means this is a Hive serde 
table.
+      case None =>
+        table = restoreHiveSerdeTable(table)
+
+      // This is a regular data source table.
+      case Some(provider) =>
+        table = restoreDataSourceTable(table, provider)
     }
 
     // construct Spark's statistics from information in Hive metastore


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-21150][SQL] Persistent view stored in Hive metastore should be case preserving

Reply via email to