[1/2] spark git commit: [SPARK-19678][SQL] remove MetastoreRelation

lixiao Tue, 28 Feb 2017 09:24:58 -0800

Repository: spark
Updated Branches:
  refs/heads/master b40546651 -> 7c7fc30b4



http://git-wip-us.apache.org/repos/asf/spark/blob/7c7fc30b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index e2fcd2f..962998e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -23,7 +23,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.CatalogStatistics
+import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, 
CatalogStatistics}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.joins._
@@ -33,52 +33,46 @@ import org.apache.spark.sql.types._
 
 class StatisticsSuite extends StatisticsCollectionTestBase with 
TestHiveSingleton {
 
-  test("MetastoreRelations fallback to HDFS for size estimation") {
-    val enableFallBackToHdfsForStats = 
spark.sessionState.conf.fallBackToHdfsForStatsEnabled
-    try {
-      withTempDir { tempDir =>
-
-        // EXTERNAL OpenCSVSerde table pointing to LOCATION
-
-        val file1 = new File(tempDir + "/data1")
-        val writer1 = new PrintWriter(file1)
-        writer1.write("1,2")
-        writer1.close()
-
-        val file2 = new File(tempDir + "/data2")
-        val writer2 = new PrintWriter(file2)
-        writer2.write("1,2")
-        writer2.close()
-
-        sql(
-          s"""CREATE EXTERNAL TABLE csv_table(page_id INT, impressions INT)
-            ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
-            WITH SERDEPROPERTIES (
-              \"separatorChar\" = \",\",
-              \"quoteChar\"     = \"\\\"\",
-              \"escapeChar\"    = \"\\\\\")
-            LOCATION '${tempDir.toURI}'
-          """)
-
-        spark.conf.set(SQLConf.ENABLE_FALL_BACK_TO_HDFS_FOR_STATS.key, true)
-
-        val relation = 
spark.table("csv_table").queryExecution.analyzed.children.head
-          .asInstanceOf[MetastoreRelation]
-
-        val properties = relation.hiveQlTable.getParameters
-        assert(properties.get("totalSize").toLong <= 0, "external table 
totalSize must be <= 0")
-        assert(properties.get("rawDataSize").toLong <= 0, "external table 
rawDataSize must be <= 0")
-
-        val sizeInBytes = relation.stats(conf).sizeInBytes
-        assert(sizeInBytes === BigInt(file1.length() + file2.length()))
+  test("Hive serde tables should fallback to HDFS for size estimation") {
+    withSQLConf(SQLConf.ENABLE_FALL_BACK_TO_HDFS_FOR_STATS.key -> "true") {
+      withTable("csv_table") {
+        withTempDir { tempDir =>
+          // EXTERNAL OpenCSVSerde table pointing to LOCATION
+          val file1 = new File(tempDir + "/data1")
+          val writer1 = new PrintWriter(file1)
+          writer1.write("1,2")
+          writer1.close()
+
+          val file2 = new File(tempDir + "/data2")
+          val writer2 = new PrintWriter(file2)
+          writer2.write("1,2")
+          writer2.close()
+
+          sql(
+            s"""
+               |CREATE EXTERNAL TABLE csv_table(page_id INT, impressions INT)
+               |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+               |WITH SERDEPROPERTIES (
+               |\"separatorChar\" = \",\",
+               |\"quoteChar\"     = \"\\\"\",
+               |\"escapeChar\"    = \"\\\\\")
+               |LOCATION '${tempDir.toURI}'""".stripMargin)
+
+          val relation = 
spark.table("csv_table").queryExecution.analyzed.children.head
+            .asInstanceOf[CatalogRelation]
+
+          val properties = relation.tableMeta.properties
+          assert(properties("totalSize").toLong <= 0, "external table 
totalSize must be <= 0")
+          assert(properties("rawDataSize").toLong <= 0, "external table 
rawDataSize must be <= 0")
+
+          val sizeInBytes = relation.stats(conf).sizeInBytes
+          assert(sizeInBytes === BigInt(file1.length() + file2.length()))
+        }
       }
-    } finally {
-      spark.conf.set(SQLConf.ENABLE_FALL_BACK_TO_HDFS_FOR_STATS.key, 
enableFallBackToHdfsForStats)
-      sql("DROP TABLE csv_table ")
     }
   }
 
-  test("analyze MetastoreRelations") {
+  test("analyze Hive serde tables") {
     def queryTotalSize(tableName: String): BigInt =
       spark.table(tableName).queryExecution.analyzed.stats(conf).sizeInBytes
 
@@ -152,9 +146,11 @@ class StatisticsSuite extends StatisticsCollectionTestBase 
with TestHiveSingleto
   }
 
   private def checkTableStats(
-      stats: Option[CatalogStatistics],
+      tableName: String,
       hasSizeInBytes: Boolean,
-      expectedRowCounts: Option[Int]): Unit = {
+      expectedRowCounts: Option[Int]): Option[CatalogStatistics] = {
+    val stats = 
spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).stats
+
     if (hasSizeInBytes || expectedRowCounts.nonEmpty) {
       assert(stats.isDefined)
       assert(stats.get.sizeInBytes > 0)
@@ -162,26 +158,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase 
with TestHiveSingleto
     } else {
       assert(stats.isEmpty)
     }
-  }
 
-  private def checkTableStats(
-      tableName: String,
-      isDataSourceTable: Boolean,
-      hasSizeInBytes: Boolean,
-      expectedRowCounts: Option[Int]): Option[CatalogStatistics] = {
-    val df = sql(s"SELECT * FROM $tableName")
-    val stats = df.queryExecution.analyzed.collect {
-      case rel: MetastoreRelation =>
-        checkTableStats(rel.catalogTable.stats, hasSizeInBytes, 
expectedRowCounts)
-        assert(!isDataSourceTable, "Expected a Hive serde table, but got a 
data source table")
-        rel.catalogTable.stats
-      case rel: LogicalRelation =>
-        checkTableStats(rel.catalogTable.get.stats, hasSizeInBytes, 
expectedRowCounts)
-        assert(isDataSourceTable, "Expected a data source table, but got a 
Hive serde table")
-        rel.catalogTable.get.stats
-    }
-    assert(stats.size == 1)
-    stats.head
+    stats
   }
 
   test("test table-level statistics for hive tables created in 
HiveExternalCatalog") {
@@ -192,25 +170,23 @@ class StatisticsSuite extends 
StatisticsCollectionTestBase with TestHiveSingleto
       sql(s"CREATE TABLE $textTable (key STRING, value STRING) STORED AS 
TEXTFILE")
       checkTableStats(
         textTable,
-        isDataSourceTable = false,
         hasSizeInBytes = false,
         expectedRowCounts = None)
       sql(s"INSERT INTO TABLE $textTable SELECT * FROM src")
       checkTableStats(
         textTable,
-        isDataSourceTable = false,
         hasSizeInBytes = false,
         expectedRowCounts = None)
 
       // noscan won't count the number of rows
       sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS noscan")
-      val fetchedStats1 = checkTableStats(
-        textTable, isDataSourceTable = false, hasSizeInBytes = true, 
expectedRowCounts = None)
+      val fetchedStats1 =
+        checkTableStats(textTable, hasSizeInBytes = true, expectedRowCounts = 
None)
 
       // without noscan, we count the number of rows
       sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS")
-      val fetchedStats2 = checkTableStats(
-        textTable, isDataSourceTable = false, hasSizeInBytes = true, 
expectedRowCounts = Some(500))
+      val fetchedStats2 =
+        checkTableStats(textTable, hasSizeInBytes = true, expectedRowCounts = 
Some(500))
       assert(fetchedStats1.get.sizeInBytes == fetchedStats2.get.sizeInBytes)
     }
   }
@@ -221,25 +197,25 @@ class StatisticsSuite extends 
StatisticsCollectionTestBase with TestHiveSingleto
       sql(s"CREATE TABLE $textTable (key STRING, value STRING) STORED AS 
TEXTFILE")
       sql(s"INSERT INTO TABLE $textTable SELECT * FROM src")
       sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS")
-      val fetchedStats1 = checkTableStats(
-        textTable, isDataSourceTable = false, hasSizeInBytes = true, 
expectedRowCounts = Some(500))
+      val fetchedStats1 =
+        checkTableStats(textTable, hasSizeInBytes = true, expectedRowCounts = 
Some(500))
 
       sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS noscan")
       // when the total size is not changed, the old row count is kept
-      val fetchedStats2 = checkTableStats(
-        textTable, isDataSourceTable = false, hasSizeInBytes = true, 
expectedRowCounts = Some(500))
+      val fetchedStats2 =
+        checkTableStats(textTable, hasSizeInBytes = true, expectedRowCounts = 
Some(500))
       assert(fetchedStats1 == fetchedStats2)
 
       sql(s"INSERT INTO TABLE $textTable SELECT * FROM src")
       sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS noscan")
       // update total size and remove the old and invalid row count
-      val fetchedStats3 = checkTableStats(
-        textTable, isDataSourceTable = false, hasSizeInBytes = true, 
expectedRowCounts = None)
+      val fetchedStats3 =
+        checkTableStats(textTable, hasSizeInBytes = true, expectedRowCounts = 
None)
       assert(fetchedStats3.get.sizeInBytes > fetchedStats2.get.sizeInBytes)
     }
   }
 
-  test("test statistics of LogicalRelation converted from MetastoreRelation") {
+  test("test statistics of LogicalRelation converted from Hive serde tables") {
     val parquetTable = "parquetTable"
     val orcTable = "orcTable"
     withTable(parquetTable, orcTable) {
@@ -251,21 +227,14 @@ class StatisticsSuite extends 
StatisticsCollectionTestBase with TestHiveSingleto
       // the default value for `spark.sql.hive.convertMetastoreParquet` is 
true, here we just set it
       // for robustness
       withSQLConf("spark.sql.hive.convertMetastoreParquet" -> "true") {
-        checkTableStats(
-          parquetTable, isDataSourceTable = true, hasSizeInBytes = false, 
expectedRowCounts = None)
+        checkTableStats(parquetTable, hasSizeInBytes = false, 
expectedRowCounts = None)
         sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS")
-        checkTableStats(
-          parquetTable,
-          isDataSourceTable = true,
-          hasSizeInBytes = true,
-          expectedRowCounts = Some(500))
+        checkTableStats(parquetTable, hasSizeInBytes = true, expectedRowCounts 
= Some(500))
       }
       withSQLConf("spark.sql.hive.convertMetastoreOrc" -> "true") {
-        checkTableStats(
-          orcTable, isDataSourceTable = true, hasSizeInBytes = false, 
expectedRowCounts = None)
+        checkTableStats(orcTable, hasSizeInBytes = false, expectedRowCounts = 
None)
         sql(s"ANALYZE TABLE $orcTable COMPUTE STATISTICS")
-        checkTableStats(
-          orcTable, isDataSourceTable = true, hasSizeInBytes = true, 
expectedRowCounts = Some(500))
+        checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = 
Some(500))
       }
     }
   }
@@ -385,27 +354,23 @@ class StatisticsSuite extends 
StatisticsCollectionTestBase with TestHiveSingleto
 
         // Add a filter to avoid creating too many partitions
         sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src WHERE key < 
10")
-        checkTableStats(
-          parquetTable, isDataSourceTable = true, hasSizeInBytes = false, 
expectedRowCounts = None)
+        checkTableStats(parquetTable, hasSizeInBytes = false, 
expectedRowCounts = None)
 
         // noscan won't count the number of rows
         sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS noscan")
-        val fetchedStats1 = checkTableStats(
-          parquetTable, isDataSourceTable = true, hasSizeInBytes = true, 
expectedRowCounts = None)
+        val fetchedStats1 =
+          checkTableStats(parquetTable, hasSizeInBytes = true, 
expectedRowCounts = None)
 
         sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src WHERE key < 
10")
         sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS noscan")
-        val fetchedStats2 = checkTableStats(
-          parquetTable, isDataSourceTable = true, hasSizeInBytes = true, 
expectedRowCounts = None)
+        val fetchedStats2 =
+          checkTableStats(parquetTable, hasSizeInBytes = true, 
expectedRowCounts = None)
         assert(fetchedStats2.get.sizeInBytes > fetchedStats1.get.sizeInBytes)
 
         // without noscan, we count the number of rows
         sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS")
-        val fetchedStats3 = checkTableStats(
-          parquetTable,
-          isDataSourceTable = true,
-          hasSizeInBytes = true,
-          expectedRowCounts = Some(20))
+        val fetchedStats3 =
+          checkTableStats(parquetTable, hasSizeInBytes = true, 
expectedRowCounts = Some(20))
         assert(fetchedStats3.get.sizeInBytes == fetchedStats2.get.sizeInBytes)
       }
     }
@@ -426,11 +391,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase 
with TestHiveSingleto
       val dfNoCols = spark.createDataFrame(rddNoCols, StructType(Seq.empty))
       dfNoCols.write.format("json").saveAsTable(table_no_cols)
       sql(s"ANALYZE TABLE $table_no_cols COMPUTE STATISTICS")
-      checkTableStats(
-        table_no_cols,
-        isDataSourceTable = true,
-        hasSizeInBytes = true,
-        expectedRowCounts = Some(10))
+      checkTableStats(table_no_cols, hasSizeInBytes = true, expectedRowCounts 
= Some(10))
     }
   }
 
@@ -478,10 +439,10 @@ class StatisticsSuite extends 
StatisticsCollectionTestBase with TestHiveSingleto
     assert(statsAfterUpdate.rowCount == Some(2))
   }
 
-  test("estimates the size of a test MetastoreRelation") {
+  test("estimates the size of a test Hive serde tables") {
     val df = sql("""SELECT * FROM src""")
-    val sizes = df.queryExecution.analyzed.collect { case mr: 
MetastoreRelation =>
-      mr.stats(conf).sizeInBytes
+    val sizes = df.queryExecution.analyzed.collect {
+      case relation: CatalogRelation => relation.stats(conf).sizeInBytes
     }
     assert(sizes.size === 1, s"Size wrong for:\n ${df.queryExecution}")
     assert(sizes(0).equals(BigInt(5812)),
@@ -533,7 +494,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase 
with TestHiveSingleto
       after()
     }
 
-    /** Tests for MetastoreRelation */
+    /** Tests for Hive serde tables */
     val metastoreQuery = """SELECT * FROM src a JOIN src b ON a.key = 238 AND 
a.key = b.key"""
     val metastoreAnswer = Seq.fill(4)(Row(238, "val_238", 238, "val_238"))
     mkTest(
@@ -541,7 +502,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase 
with TestHiveSingleto
       () => (),
       metastoreQuery,
       metastoreAnswer,
-      implicitly[ClassTag[MetastoreRelation]]
+      implicitly[ClassTag[CatalogRelation]]
     )
   }
 
@@ -555,9 +516,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase 
with TestHiveSingleto
 
     // Assert src has a size smaller than the threshold.
     val sizes = df.queryExecution.analyzed.collect {
-      case r if implicitly[ClassTag[MetastoreRelation]].runtimeClass
-        .isAssignableFrom(r.getClass) =>
-        r.stats(conf).sizeInBytes
+      case relation: CatalogRelation => relation.stats(conf).sizeInBytes
     }
     assert(sizes.size === 2 && sizes(1) <= 
spark.sessionState.conf.autoBroadcastJoinThreshold
       && sizes(0) <= spark.sessionState.conf.autoBroadcastJoinThreshold,

http://git-wip-us.apache.org/repos/asf/spark/blob/7c7fc30b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index f3151d5..536ca8f 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -385,7 +385,7 @@ abstract class HiveComparisonTest
               // also print out the query plans and results for those.
               val computedTablesMessages: String = try {
                 val tablesRead = new 
TestHiveQueryExecution(query).executedPlan.collect {
-                  case ts: HiveTableScanExec => ts.relation.tableName
+                  case ts: HiveTableScanExec => 
ts.relation.tableMeta.identifier
                 }.toSet
 
                 TestHive.reset()
@@ -393,7 +393,7 @@ abstract class HiveComparisonTest
                 executions.foreach(_.toRdd)
                 val tablesGenerated = queryList.zip(executions).flatMap {
                   case (q, e) => e.analyzed.collect {
-                    case i: InsertIntoHiveTable if tablesRead contains 
i.table.tableName =>
+                    case i: InsertIntoHiveTable if tablesRead contains 
i.table.identifier =>
                       (q, e, i)
                   }
                 }

http://git-wip-us.apache.org/repos/asf/spark/blob/7c7fc30b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 5c460d2..90e037e 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.hive.MetastoreRelation
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
@@ -95,8 +94,7 @@ class HiveTableScanSuite extends HiveComparisonTest with 
SQLTestUtils with TestH
   private def checkNumScannedPartitions(stmt: String, expectedNumParts: Int): 
Unit = {
     val plan = sql(stmt).queryExecution.sparkPlan
     val numPartitions = plan.collectFirst {
-      case p: HiveTableScanExec =>
-        p.relation.getHiveQlPartitions(p.partitionPruningPred).length
+      case p: HiveTableScanExec => p.rawPartitions.length
     }.getOrElse(0)
     assert(numPartitions == expectedNumParts)
   }
@@ -170,11 +168,11 @@ class HiveTableScanSuite extends HiveComparisonTest with 
SQLTestUtils with TestH
           s"""
              |SELECT * FROM $table
            """.stripMargin).queryExecution.sparkPlan
-        val relation = plan.collectFirst {
-          case p: HiveTableScanExec => p.relation
+        val scan = plan.collectFirst {
+          case p: HiveTableScanExec => p
         }.get
-        val tableCols = relation.hiveQlTable.getCols
-        relation.getHiveQlPartitions().foreach(p => assert(p.getCols.size == 
tableCols.size))
+        val numDataCols = scan.relation.dataCols.length
+        scan.rawPartitions.foreach(p => assert(p.getCols.size == numDataCols))
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/7c7fc30b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index 24df73b..d535bef 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -153,8 +153,8 @@ class PruningSuite extends HiveComparisonTest with 
BeforeAndAfter {
       val (actualScannedColumns, actualPartValues) = plan.collect {
         case p @ HiveTableScanExec(columns, relation, _) =>
           val columnNames = columns.map(_.name)
-          val partValues = if 
(relation.catalogTable.partitionColumnNames.nonEmpty) {
-            p.prunePartitions(relation.getHiveQlPartitions()).map(_.getValues)
+          val partValues = if (relation.isPartitioned) {
+            p.prunePartitions(p.rawPartitions).map(_.getValues)
           } else {
             Seq.empty
           }

http://git-wip-us.apache.org/repos/asf/spark/blob/7c7fc30b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 9f61763..ef2d451 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -28,12 +28,12 @@ import org.apache.spark.TestUtils
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, 
FunctionRegistry, NoSuchPartitionException}
-import org.apache.spark.sql.catalyst.catalog.CatalogTableType
+import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, 
CatalogTableType}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, 
LogicalRelation}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.{HiveUtils, MetastoreRelation}
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -526,7 +526,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils 
with TestHiveSingleton {
       case LogicalRelation(r: HadoopFsRelation, _, _) =>
         if (!isDataSourceTable) {
           fail(
-            s"${classOf[MetastoreRelation].getCanonicalName} is expected, but 
found " +
+            s"${classOf[CatalogRelation].getCanonicalName} is expected, but 
found " +
               s"${HadoopFsRelation.getClass.getCanonicalName}.")
         }
         userSpecifiedLocation match {
@@ -536,15 +536,15 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils 
with TestHiveSingleton {
         }
         assert(catalogTable.provider.get === format)
 
-      case r: MetastoreRelation =>
+      case r: CatalogRelation =>
         if (isDataSourceTable) {
           fail(
             s"${HadoopFsRelation.getClass.getCanonicalName} is expected, but 
found " +
-              s"${classOf[MetastoreRelation].getCanonicalName}.")
+              s"${classOf[CatalogRelation].getCanonicalName}.")
         }
         userSpecifiedLocation match {
           case Some(location) =>
-            assert(r.catalogTable.location === location)
+            assert(r.tableMeta.location === location)
           case None => // OK.
         }
         // Also make sure that the format and serde are as desired.
@@ -1030,7 +1030,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils 
with TestHiveSingleton {
     withSQLConf(SQLConf.CONVERT_CTAS.key -> "false") {
       sql("CREATE TABLE explodeTest (key bigInt)")
       table("explodeTest").queryExecution.analyzed match {
-        case SubqueryAlias(_, r: MetastoreRelation, _) => // OK
+        case SubqueryAlias(_, r: CatalogRelation, _) => // OK
         case _ =>
           fail("To correctly test the fix of SPARK-5875, explodeTest should be 
a MetastoreRelation")
       }
@@ -2043,4 +2043,18 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils 
with TestHiveSingleton {
       }
     }
   }
+
+  test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More 
Than Once") {
+    withTable("bar") {
+      withTempView("foo") {
+        sql("select 0 as id").createOrReplaceTempView("foo")
+        // If we optimize the query in CTAS more than once, the following 
saveAsTable will fail
+        // with the error: `GROUP BY position 0 is not in select list (valid 
range is [1, 1])`
+        sql("SELECT * FROM foo group by 
id").toDF().write.format("hive").saveAsTable("bar")
+        checkAnswer(spark.table("bar"), Row(0) :: Nil)
+        val tableMetadata = 
spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar"))
+        assert(tableMetadata.provider == Some("hive"), "the expected table is 
a Hive serde table")
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/7c7fc30b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index 9fa1fb9..38a5477 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -26,8 +26,9 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogRelation
 import org.apache.spark.sql.execution.datasources.{LogicalRelation, 
RecordReaderIterator}
-import org.apache.spark.sql.hive.{HiveUtils, MetastoreRelation}
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.internal.SQLConf
@@ -473,7 +474,7 @@ class OrcQuerySuite extends QueryTest with 
BeforeAndAfterAll with OrcTest {
                 }
               } else {
                 queryExecution.analyzed.collectFirst {
-                  case _: MetastoreRelation => ()
+                  case _: CatalogRelation => ()
                 }.getOrElse {
                   fail(s"Expecting no conversion from orc to data sources, " +
                     s"but got:\n$queryExecution")

http://git-wip-us.apache.org/repos/asf/spark/blob/7c7fc30b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 1a1b257..3512c4a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -21,8 +21,8 @@ import java.io.File
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogRelation
 import org.apache.spark.sql.execution.DataSourceScanExec
-import org.apache.spark.sql.execution.command.ExecutedCommandExec
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.execution.HiveTableScanExec
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -806,7 +806,7 @@ class ParquetSourceSuite extends ParquetPartitioningTest {
               }
             } else {
               queryExecution.analyzed.collectFirst {
-                case _: MetastoreRelation =>
+                case _: CatalogRelation =>
               }.getOrElse {
                 fail(s"Expecting no conversion from parquet to data sources, " 
+
                   s"but got:\n$queryExecution")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: [SPARK-19678][SQL] remove MetastoreRelation

Reply via email to