Repository: spark Updated Branches: refs/heads/branch-2.0 1230516d9 -> 7d8cddfb4
[SPARK-15998][SQL] Verification of SQLConf HIVE_METASTORE_PARTITION_PRUNING #### What changes were proposed in this pull request? `HIVE_METASTORE_PARTITION_PRUNING` is a public `SQLConf`. When `true`, some predicates will be pushed down into the Hive metastore so that unmatching partitions can be eliminated earlier. The current default value is `false`. For performance improvement, users might turn this parameter on. So far, the code base does not have such a test case to verify whether this `SQLConf` properly works. This PR is to improve the test case coverage for avoiding future regression. #### How was this patch tested? N/A Author: gatorsmile <gatorsm...@gmail.com> Closes #13716 from gatorsmile/addTestMetastorePartitionPruning. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d8cddfb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d8cddfb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d8cddfb Branch: refs/heads/branch-2.0 Commit: 7d8cddfb495d406b9f2fb5216edd14dea442ec73 Parents: 1230516 Author: gatorsmile <gatorsm...@gmail.com> Authored: Thu Jun 16 14:23:17 2016 -0700 Committer: Andrew Or <and...@databricks.com> Committed: Thu Jun 16 14:26:46 2016 -0700 ---------------------------------------------------------------------- .../sql/hive/execution/HiveTableScanSuite.scala | 60 +++++++++++++++++++- 1 file changed, 57 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/7d8cddfb/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index 60f8be5..76d3f3d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -18,13 +18,14 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.Row -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton} import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.util.Utils -class HiveTableScanSuite extends HiveComparisonTest { +class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestHiveSingleton { createQueryTest("partition_based_table_scan_with_different_serde", """ @@ -89,4 +90,57 @@ class HiveTableScanSuite extends HiveComparisonTest { assert(sql("select CaseSensitiveColName from spark_4959_2").head() === Row("hi")) assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi")) } + + private def checkNumScannedPartitions(stmt: String, expectedNumParts: Int): Unit = { + val plan = sql(stmt).queryExecution.sparkPlan + val numPartitions = plan.collectFirst { + case p: HiveTableScanExec => + p.relation.getHiveQlPartitions(p.partitionPruningPred).length + }.getOrElse(0) + assert(numPartitions == expectedNumParts) + } + + test("Verify SQLConf HIVE_METASTORE_PARTITION_PRUNING") { + val view = "src" + withTempTable(view) { + spark.range(1, 5).createOrReplaceTempView(view) + val table = "table_with_partition" + withTable(table) { + sql( + s""" + |CREATE TABLE $table(id string) + |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) + """.stripMargin) + sql( + s""" + |FROM $view v + |INSERT INTO TABLE $table + |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') + |SELECT v.id + |INSERT INTO TABLE $table + |PARTITION (p1='a',p2='c',p3='c',p4='d',p5='e') + |SELECT v.id + """.stripMargin) + + Seq("true", "false").foreach { hivePruning => + withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> hivePruning) { + // If the pruning predicate is used, getHiveQlPartitions should only return the + // qualified partition; Otherwise, it return all the partitions. + val expectedNumPartitions = if (hivePruning == "true") 1 else 2 + checkNumScannedPartitions( + stmt = s"SELECT id, p2 FROM $table WHERE p2 <= 'b'", expectedNumPartitions) + } + } + + Seq("true", "false").foreach { hivePruning => + withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> hivePruning) { + // If the pruning predicate does not exist, getHiveQlPartitions should always + // return all the partitions. + checkNumScannedPartitions( + stmt = s"SELECT id, p2 FROM $table WHERE id <= 3", expectedNumParts = 2) + } + } + } + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org