spark git commit: [SPARK-15998][SQL] Verification of SQLConf HIVE_METASTORE_PARTITION_PRUNING

andrewor14 Thu, 16 Jun 2016 14:27:08 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 1230516d9 -> 7d8cddfb4



[SPARK-15998][SQL] Verification of SQLConf HIVE_METASTORE_PARTITION_PRUNING

#### What changes were proposed in this pull request?
`HIVE_METASTORE_PARTITION_PRUNING` is a public `SQLConf`. When `true`, some 
predicates will be pushed down into the Hive metastore so that unmatching 
partitions can be eliminated earlier. The current default value is `false`. For 
performance improvement, users might turn this parameter on.

So far, the code base does not have such a test case to verify whether this 
`SQLConf` properly works. This PR is to improve the test case coverage for 
avoiding future regression.

#### How was this patch tested?
N/A

Author: gatorsmile <gatorsm...@gmail.com>

Closes #13716 from gatorsmile/addTestMetastorePartitionPruning.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d8cddfb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d8cddfb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d8cddfb

Branch: refs/heads/branch-2.0
Commit: 7d8cddfb495d406b9f2fb5216edd14dea442ec73
Parents: 1230516
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Thu Jun 16 14:23:17 2016 -0700
Committer: Andrew Or <and...@databricks.com>
Committed: Thu Jun 16 14:26:46 2016 -0700

----------------------------------------------------------------------
 .../sql/hive/execution/HiveTableScanSuite.scala | 60 +++++++++++++++++++-
 1 file changed, 57 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7d8cddfb/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 60f8be5..76d3f3d 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -18,13 +18,14 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.util.Utils
 
-class HiveTableScanSuite extends HiveComparisonTest {
+class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with 
TestHiveSingleton {
 
   createQueryTest("partition_based_table_scan_with_different_serde",
     """
@@ -89,4 +90,57 @@ class HiveTableScanSuite extends HiveComparisonTest {
     assert(sql("select CaseSensitiveColName from spark_4959_2").head() === 
Row("hi"))
     assert(sql("select casesensitivecolname from spark_4959_2").head() === 
Row("hi"))
   }
+
+  private def checkNumScannedPartitions(stmt: String, expectedNumParts: Int): 
Unit = {
+    val plan = sql(stmt).queryExecution.sparkPlan
+    val numPartitions = plan.collectFirst {
+      case p: HiveTableScanExec =>
+        p.relation.getHiveQlPartitions(p.partitionPruningPred).length
+    }.getOrElse(0)
+    assert(numPartitions == expectedNumParts)
+  }
+
+  test("Verify SQLConf HIVE_METASTORE_PARTITION_PRUNING") {
+    val view = "src"
+    withTempTable(view) {
+      spark.range(1, 5).createOrReplaceTempView(view)
+      val table = "table_with_partition"
+      withTable(table) {
+        sql(
+          s"""
+             |CREATE TABLE $table(id string)
+             |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 
string)
+           """.stripMargin)
+        sql(
+          s"""
+             |FROM $view v
+             |INSERT INTO TABLE $table
+             |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e')
+             |SELECT v.id
+             |INSERT INTO TABLE $table
+             |PARTITION (p1='a',p2='c',p3='c',p4='d',p5='e')
+             |SELECT v.id
+           """.stripMargin)
+
+        Seq("true", "false").foreach { hivePruning =>
+          withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> 
hivePruning) {
+            // If the pruning predicate is used, getHiveQlPartitions should 
only return the
+            // qualified partition; Otherwise, it return all the partitions.
+            val expectedNumPartitions = if (hivePruning == "true") 1 else 2
+            checkNumScannedPartitions(
+              stmt = s"SELECT id, p2 FROM $table WHERE p2 <= 'b'", 
expectedNumPartitions)
+          }
+        }
+
+        Seq("true", "false").foreach { hivePruning =>
+          withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> 
hivePruning) {
+            // If the pruning predicate does not exist, getHiveQlPartitions 
should always
+            // return all the partitions.
+            checkNumScannedPartitions(
+              stmt = s"SELECT id, p2 FROM $table WHERE id <= 3", 
expectedNumParts = 2)
+          }
+        }
+      }
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15998][SQL] Verification of SQLConf HIVE_METASTORE_PARTITION_PRUNING

Reply via email to