git commit: [SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once.

rxin Thu, 19 Jun 2014 14:14:34 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-1.0 a4c3a8069 -> 58d06846f



[SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once.

Author: Michael Armbrust <mich...@databricks.com>

Closes #1129 from marmbrus/doubleCreateAs and squashes the following commits:

9c6d9e4 [Michael Armbrust] Fix typo.
5128fe2 [Michael Armbrust] Make sure InsertIntoHiveTable doesn't execute each 
time you ask for its result.

(cherry picked from commit 777c5958c4088182f9e2daba435ccb413a2f69d7)
Signed-off-by: Reynold Xin <r...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/58d06846
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/58d06846
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/58d06846

Branch: refs/heads/branch-1.0
Commit: 58d06846f06726a9a6ce10995fe71155eae880bf
Parents: a4c3a80
Author: Michael Armbrust <mich...@databricks.com>
Authored: Thu Jun 19 14:14:03 2014 -0700
Committer: Reynold Xin <r...@apache.org>
Committed: Thu Jun 19 14:14:21 2014 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/hive/execution/hiveOperators.scala    | 6 +++++-
 .../org/apache/spark/sql/hive/execution/HiveQuerySuite.scala   | 6 ++++++
 2 files changed, 11 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/58d06846/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
index a839231..240aa0d 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
@@ -344,12 +344,16 @@ case class InsertIntoHiveTable(
     writer.commitJob()
   }
 
+  override def execute() = result
+
   /**
    * Inserts all the rows in the table into Hive.  Row objects are properly 
serialized with the
    * `org.apache.hadoop.hive.serde2.SerDe` and the
    * `org.apache.hadoop.mapred.OutputFormat` provided by the table definition.
+   *
+   * Note: this is run once and then kept to avoid double insertions.
    */
-  def execute() = {
+  private lazy val result: RDD[Row] = {
     val childRdd = child.execute()
     assert(childRdd != null)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/58d06846/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 8b2bdd5..5118f4b 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -28,6 +28,12 @@ import org.apache.spark.sql.{SchemaRDD, execution, Row}
  */
 class HiveQuerySuite extends HiveComparisonTest {
 
+  test("CREATE TABLE AS runs once") {
+    hql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
+    assert(hql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1,
+      "Incorrect number of rows in created table")
+  }
+
   createQueryTest("between",
     "SELECT * FROM src WHERE key Between 1 and 2")

git commit: [SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once.

Reply via email to