spark git commit: [SPARK-16389][SQL] Remove MetastoreRelation from SparkHiveWriterContainer and SparkHiveDynamicPartitionWriterContainer

wenchen Tue, 05 Jul 2016 21:10:14 -0700

Repository: spark
Updated Branches:
  refs/heads/master d0d28507c -> ec18cd0af



[SPARK-16389][SQL] Remove MetastoreRelation from SparkHiveWriterContainer and 
SparkHiveDynamicPartitionWriterContainer

#### What changes were proposed in this pull request?
- Remove useless `MetastoreRelation` from the signature of 
`SparkHiveWriterContainer` and `SparkHiveDynamicPartitionWriterContainer`.
- Avoid unnecessary metadata retrieval using Hive client in 
`InsertIntoHiveTable`.

#### How was this patch tested?
Existing test cases already cover it.

Author: gatorsmile <gatorsm...@gmail.com>

Closes #14062 from gatorsmile/removeMetastoreRelation.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ec18cd0a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ec18cd0a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ec18cd0a

Branch: refs/heads/master
Commit: ec18cd0af497d170bdcec345d845d925fb2880cf
Parents: d0d2850
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Wed Jul 6 12:09:53 2016 +0800
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Wed Jul 6 12:09:53 2016 +0800

----------------------------------------------------------------------
 .../sql/hive/execution/InsertIntoHiveTable.scala    | 16 ++++++----------
 .../spark/sql/hive/hiveWriterContainers.scala       |  8 +++-----
 2 files changed, 9 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ec18cd0a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 3d58d49..eb0c31c 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -223,22 +223,18 @@ case class InsertIntoHiveTable(
         jobConf,
         fileSinkConf,
         dynamicPartColNames,
-        child.output,
-        table)
+        child.output)
     } else {
       new SparkHiveWriterContainer(
         jobConf,
         fileSinkConf,
-        child.output,
-        table)
+        child.output)
     }
 
     @transient val outputClass = 
writerContainer.newSerializer(table.tableDesc).getSerializedClass
     saveAsHiveFile(child.execute(), outputClass, fileSinkConf, jobConfSer, 
writerContainer)
 
     val outputPath = FileOutputFormat.getOutputPath(jobConf)
-    // Have to construct the format of dbname.tablename.
-    val qualifiedTableName = s"${table.databaseName}.${table.tableName}"
     // TODO: Correctly set holdDDLTime.
     // In most of the time, we should have holdDDLTime = false.
     // holdDDLTime will be true when TOK_HOLD_DDLTIME presents in the query as 
a hint.
@@ -260,7 +256,7 @@ case class InsertIntoHiveTable(
         client.synchronized {
           client.loadDynamicPartitions(
             outputPath.toString,
-            qualifiedTableName,
+            table.catalogTable.qualifiedName,
             orderedPartitionSpec,
             overwrite,
             numDynamicPartitions,
@@ -274,13 +270,13 @@ case class InsertIntoHiveTable(
         // scalastyle:on
         val oldPart =
           client.getPartitionOption(
-            client.getTable(table.databaseName, table.tableName),
+            table.catalogTable,
             partitionSpec)
 
         if (oldPart.isEmpty || !ifNotExists) {
             client.loadPartition(
               outputPath.toString,
-              qualifiedTableName,
+              table.catalogTable.qualifiedName,
               orderedPartitionSpec,
               overwrite,
               holdDDLTime,
@@ -291,7 +287,7 @@ case class InsertIntoHiveTable(
     } else {
       client.loadTable(
         outputPath.toString, // TODO: URI
-        qualifiedTableName,
+        table.catalogTable.qualifiedName,
         overwrite,
         holdDDLTime)
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/ec18cd0a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
index e65c24e..ea88276 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
@@ -53,8 +53,7 @@ import 
org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
 private[hive] class SparkHiveWriterContainer(
     @transient private val jobConf: JobConf,
     fileSinkConf: FileSinkDesc,
-    inputSchema: Seq[Attribute],
-    table: MetastoreRelation)
+    inputSchema: Seq[Attribute])
   extends Logging
   with HiveInspectors
   with Serializable {
@@ -217,9 +216,8 @@ private[spark] class 
SparkHiveDynamicPartitionWriterContainer(
     jobConf: JobConf,
     fileSinkConf: FileSinkDesc,
     dynamicPartColNames: Array[String],
-    inputSchema: Seq[Attribute],
-    table: MetastoreRelation)
-  extends SparkHiveWriterContainer(jobConf, fileSinkConf, inputSchema, table) {
+    inputSchema: Seq[Attribute])
+  extends SparkHiveWriterContainer(jobConf, fileSinkConf, inputSchema) {
 
   import SparkHiveDynamicPartitionWriterContainer._
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16389][SQL] Remove MetastoreRelation from SparkHiveWriterContainer and SparkHiveDynamicPartitionWriterContainer

Reply via email to