Repository: spark
Updated Branches:
  refs/heads/master 17edfec59 -> 8be7e6bb3


[SPARK-21973][SQL] Add an new option to filter queries in TPC-DS

## What changes were proposed in this pull request?
This pr added a new option to filter TPC-DS queries to run in 
`TPCDSQueryBenchmark`.
By default, `TPCDSQueryBenchmark` runs all the TPC-DS queries.
This change could enable developers to run some of the TPC-DS queries by this 
option,
e.g., to run q2, q4, and q6 only:
```
spark-submit --class <this class> --conf spark.sql.tpcds.queryFilter="q2,q4,q6" 
--jars <spark sql test jar>
```

## How was this patch tested?
Manually checked.

Author: Takeshi Yamamuro <yamam...@apache.org>

Closes #19188 from maropu/RunPartialQueriesInTPCDS.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8be7e6bb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8be7e6bb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8be7e6bb

Branch: refs/heads/master
Commit: 8be7e6bb3cc8afd07c24e7dbf0f8fbe0f491d740
Parents: 17edfec
Author: Takeshi Yamamuro <yamam...@apache.org>
Authored: Wed Sep 13 21:54:10 2017 -0700
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Wed Sep 13 21:54:10 2017 -0700

----------------------------------------------------------------------
 .../benchmark/TPCDSQueryBenchmark.scala         | 23 +++++++++++++++++---
 .../TPCDSQueryBenchmarkArguments.scala          | 17 +++++++++++++--
 2 files changed, 35 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/8be7e6bb/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index 63d118c..99c6df7 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
@@ -29,9 +30,9 @@ import org.apache.spark.util.Benchmark
 /**
  * Benchmark to measure TPCDS query performance.
  * To run this:
- *  spark-submit --class <this class> <spark sql test jar> <TPCDS data 
location>
+ *  spark-submit --class <this class> <spark sql test jar> --data-location 
<TPCDS data location>
  */
-object TPCDSQueryBenchmark {
+object TPCDSQueryBenchmark extends Logging {
   val conf =
     new SparkConf()
       .setMaster("local[1]")
@@ -90,7 +91,9 @@ object TPCDSQueryBenchmark {
       benchmark.addCase(name) { i =>
         spark.sql(queryString).collect()
       }
+      logInfo(s"\n\n===== TPCDS QUERY BENCHMARK OUTPUT FOR $name =====\n")
       benchmark.run()
+      logInfo(s"\n\n===== FINISHED $name =====\n")
     }
   }
 
@@ -110,6 +113,20 @@ object TPCDSQueryBenchmark {
       "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
       "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
 
-    tpcdsAll(benchmarkArgs.dataLocation, queries = tpcdsQueries)
+    // If `--query-filter` defined, filters the queries that this option 
selects
+    val queriesToRun = if (benchmarkArgs.queryFilter.nonEmpty) {
+      val queries = tpcdsQueries.filter { case queryName =>
+        benchmarkArgs.queryFilter.contains(queryName)
+      }
+      if (queries.isEmpty) {
+        throw new RuntimeException(
+          s"Empty queries to run. Bad query name filter: 
${benchmarkArgs.queryFilter}")
+      }
+      queries
+    } else {
+      tpcdsQueries
+    }
+
+    tpcdsAll(benchmarkArgs.dataLocation, queries = queriesToRun)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/8be7e6bb/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
index 8edc77b..184ffff 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
@@ -17,21 +17,33 @@
 
 package org.apache.spark.sql.execution.benchmark
 
+import java.util.Locale
+
+
 class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
   var dataLocation: String = null
+  var queryFilter: Set[String] = Set.empty
 
   parseArgs(args.toList)
   validateArguments()
 
+  private def optionMatch(optionName: String, s: String): Boolean = {
+    optionName == s.toLowerCase(Locale.ROOT)
+  }
+
   private def parseArgs(inputArgs: List[String]): Unit = {
     var args = inputArgs
 
-    while(args.nonEmpty) {
+    while (args.nonEmpty) {
       args match {
-        case ("--data-location") :: value :: tail =>
+        case optName :: value :: tail if optionMatch("--data-location", 
optName) =>
           dataLocation = value
           args = tail
 
+        case optName :: value :: tail if optionMatch("--query-filter", 
optName) =>
+          queryFilter = 
value.toLowerCase(Locale.ROOT).split(",").map(_.trim).toSet
+          args = tail
+
         case _ =>
           // scalastyle:off println
           System.err.println("Unknown/unsupported param " + args)
@@ -47,6 +59,7 @@ class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
       |Usage: spark-submit --class <this class> <spark sql test jar> [Options]
       |Options:
       |  --data-location      Path to TPCDS data
+      |  --query-filter       Queries to filter, e.g., q3,q5,q13
       |
       
|------------------------------------------------------------------------------------------------------------------
       |In order to run this benchmark, please follow the instructions at


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to