Repository: spark Updated Branches: refs/heads/master e58f27567 -> 9fafa8209
[SPARK-22800][TEST][SQL] Add a SSB query suite ## What changes were proposed in this pull request? Add a test suite to ensure all the [SSB (Star Schema Benchmark)](https://www.cs.umb.edu/~poneil/StarSchemaB.PDF) queries can be successfully analyzed, optimized and compiled without hitting the max iteration threshold. ## How was this patch tested? Added `SSBQuerySuite`. Author: Takeshi Yamamuro <yamam...@apache.org> Closes #19990 from maropu/SPARK-22800. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9fafa820 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9fafa820 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9fafa820 Branch: refs/heads/master Commit: 9fafa8209c51adc2a22b89aedf9af7b5e29e0059 Parents: e58f275 Author: Takeshi Yamamuro <yamam...@apache.org> Authored: Fri Dec 15 09:56:22 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Fri Dec 15 09:56:22 2017 -0800 ---------------------------------------------------------------------- sql/core/src/test/resources/ssb/1.1.sql | 6 ++ sql/core/src/test/resources/ssb/1.2.sql | 6 ++ sql/core/src/test/resources/ssb/1.3.sql | 6 ++ sql/core/src/test/resources/ssb/2.1.sql | 9 ++ sql/core/src/test/resources/ssb/2.2.sql | 9 ++ sql/core/src/test/resources/ssb/2.3.sql | 9 ++ sql/core/src/test/resources/ssb/3.1.sql | 10 +++ sql/core/src/test/resources/ssb/3.2.sql | 10 +++ sql/core/src/test/resources/ssb/3.3.sql | 12 +++ sql/core/src/test/resources/ssb/3.4.sql | 12 +++ sql/core/src/test/resources/ssb/4.1.sql | 11 +++ sql/core/src/test/resources/ssb/4.2.sql | 12 +++ sql/core/src/test/resources/ssb/4.3.sql | 12 +++ .../org/apache/spark/sql/SSBQuerySuite.scala | 87 ++++++++++++++++++++ .../org/apache/spark/sql/TPCHQuerySuite.scala | 2 - 15 files changed, 211 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/1.1.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/1.1.sql b/sql/core/src/test/resources/ssb/1.1.sql new file mode 100644 index 0000000..62da302 --- /dev/null +++ b/sql/core/src/test/resources/ssb/1.1.sql @@ -0,0 +1,6 @@ +select sum(lo_extendedprice*lo_discount) as revenue + from lineorder, date + where lo_orderdate = d_datekey + and d_year = 1993 + and lo_discount between 1 and 3 + and lo_quantity < 25 http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/1.2.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/1.2.sql b/sql/core/src/test/resources/ssb/1.2.sql new file mode 100644 index 0000000..1657bfd --- /dev/null +++ b/sql/core/src/test/resources/ssb/1.2.sql @@ -0,0 +1,6 @@ +select sum(lo_extendedprice*lo_discount) as revenue + from lineorder, date + where lo_orderdate = d_datekey + and d_yearmonthnum = 199401 + and lo_discount between 4 and 6 + and lo_quantity between 26 and 35 http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/1.3.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/1.3.sql b/sql/core/src/test/resources/ssb/1.3.sql new file mode 100644 index 0000000..e9bbf51 --- /dev/null +++ b/sql/core/src/test/resources/ssb/1.3.sql @@ -0,0 +1,6 @@ +select sum(lo_extendedprice*lo_discount) as revenue + from lineorder, date + where lo_orderdate = d_datekey + and d_weeknuminyear = 6 and d_year = 1994 + and lo_discount between 5 and 7 + and lo_quantity between 36 and 40 http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/2.1.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/2.1.sql b/sql/core/src/test/resources/ssb/2.1.sql new file mode 100644 index 0000000..00d4027 --- /dev/null +++ b/sql/core/src/test/resources/ssb/2.1.sql @@ -0,0 +1,9 @@ +select sum(lo_revenue), d_year, p_brand1 + from lineorder, date, part, supplier + where lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_category = 'MFGR#12' + and s_region = 'AMERICA' + group by d_year, p_brand1 + order by d_year, p_brand1 http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/2.2.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/2.2.sql b/sql/core/src/test/resources/ssb/2.2.sql new file mode 100644 index 0000000..c53a925 --- /dev/null +++ b/sql/core/src/test/resources/ssb/2.2.sql @@ -0,0 +1,9 @@ +select sum(lo_revenue), d_year, p_brand1 + from lineorder, date, part, supplier + where lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 between 'MFGR#2221' and 'MFGR#2228' + and s_region = 'ASIA' + group by d_year, p_brand1 + order by d_year, p_brand1 http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/2.3.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/2.3.sql b/sql/core/src/test/resources/ssb/2.3.sql new file mode 100644 index 0000000..e36530c --- /dev/null +++ b/sql/core/src/test/resources/ssb/2.3.sql @@ -0,0 +1,9 @@ +select sum(lo_revenue), d_year, p_brand1 + from lineorder, date, part, supplier + where lo_orderdate = d_datekey + and lo_partkey = p_partkey + and lo_suppkey = s_suppkey + and p_brand1 = 'MFGR#2221' + and s_region = 'EUROPE' + group by d_year, p_brand1 + order by d_year, p_brand1 http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/3.1.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/3.1.sql b/sql/core/src/test/resources/ssb/3.1.sql new file mode 100644 index 0000000..663ec3f --- /dev/null +++ b/sql/core/src/test/resources/ssb/3.1.sql @@ -0,0 +1,10 @@ +select c_nation, s_nation, d_year, sum(lo_revenue) as revenue + from customer, lineorder, supplier, date + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_region = 'ASIA' + and s_region = 'ASIA' + and d_year >= 1992 and d_year <= 1997 + group by c_nation, s_nation, d_year + order by d_year asc, revenue desc http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/3.2.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/3.2.sql b/sql/core/src/test/resources/ssb/3.2.sql new file mode 100644 index 0000000..e1eaf10 --- /dev/null +++ b/sql/core/src/test/resources/ssb/3.2.sql @@ -0,0 +1,10 @@ +select c_city, s_city, d_year, sum(lo_revenue) as revenue + from customer, lineorder, supplier, date + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED STATES' + and s_nation = 'UNITED STATES' + and d_year >= 1992 and d_year <= 1997 + group by c_city, s_city, d_year + order by d_year asc, revenue desc http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/3.3.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/3.3.sql b/sql/core/src/test/resources/ssb/3.3.sql new file mode 100644 index 0000000..f2cb44f --- /dev/null +++ b/sql/core/src/test/resources/ssb/3.3.sql @@ -0,0 +1,12 @@ +select c_city, s_city, d_year, sum(lo_revenue) as revenue + from customer, lineorder, supplier, date + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED KINGDOM' + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and s_nation = 'UNITED KINGDOM' + and d_year >= 1992 and d_year <= 1997 + group by c_city, s_city, d_year + order by d_year asc, revenue desc http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/3.4.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/3.4.sql b/sql/core/src/test/resources/ssb/3.4.sql new file mode 100644 index 0000000..936f246 --- /dev/null +++ b/sql/core/src/test/resources/ssb/3.4.sql @@ -0,0 +1,12 @@ +select c_city, s_city, d_year, sum(lo_revenue) as revenue + from customer, lineorder, supplier, date + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_orderdate = d_datekey + and c_nation = 'UNITED KINGDOM' + and (c_city='UNITED KI1' or c_city='UNITED KI5') + and (s_city='UNITED KI1' or s_city='UNITED KI5') + and s_nation = 'UNITED KINGDOM' + and d_yearmonth = 'Dec1997' + group by c_city, s_city, d_year + order by d_year asc, revenue desc http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/4.1.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/4.1.sql b/sql/core/src/test/resources/ssb/4.1.sql new file mode 100644 index 0000000..af19ecd --- /dev/null +++ b/sql/core/src/test/resources/ssb/4.1.sql @@ -0,0 +1,11 @@ +select d_year, c_nation, sum(lo_revenue-lo_supplycost) as profit1 + from date, customer, supplier, part, lineorder + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') + group by d_year, c_nation + order by d_year, c_nation http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/4.2.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/4.2.sql b/sql/core/src/test/resources/ssb/4.2.sql new file mode 100644 index 0000000..2ffe6e2 --- /dev/null +++ b/sql/core/src/test/resources/ssb/4.2.sql @@ -0,0 +1,12 @@ +select d_year, s_nation, p_category, sum(lo_revenue-lo_supplycost) as profit1 + from date, customer, supplier, part, lineorder + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_region = 'AMERICA' + and (d_year = 1997 or d_year = 1998) + and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2') + group by d_year, s_nation, p_category + order by d_year, s_nation, p_category http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/4.3.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/ssb/4.3.sql b/sql/core/src/test/resources/ssb/4.3.sql new file mode 100644 index 0000000..e24e2cc --- /dev/null +++ b/sql/core/src/test/resources/ssb/4.3.sql @@ -0,0 +1,12 @@ +select d_year, s_city, p_brand1, sum(lo_revenue-lo_supplycost) as profit1 + from date, customer, supplier, part, lineorder + where lo_custkey = c_custkey + and lo_suppkey = s_suppkey + and lo_partkey = p_partkey + and lo_orderdate = d_datekey + and c_region = 'AMERICA' + and s_nation = 'UNITED STATES' + and (d_year = 1997 or d_year = 1998) + and p_category = 'MFGR#14' + group by d_year, s_city, p_brand1 + order by d_year, s_city, p_brand1 http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala new file mode 100644 index 0000000..9a0c61b --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.util.resourceToString + +/** + * This test suite ensures all the Star Schema Benchmark queries can be successfully analyzed, + * optimized and compiled without hitting the max iteration threshold. + */ +class SSBQuerySuite extends BenchmarkQueryTest { + + override def beforeAll { + super.beforeAll + + sql( + """ + |CREATE TABLE `part` (`p_partkey` INT, `p_name` STRING, `p_mfgr` STRING, + |`p_category` STRING, `p_brand1` STRING, `p_color` STRING, `p_type` STRING, `p_size` INT, + |`p_container` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `supplier` (`s_suppkey` INT, `s_name` STRING, `s_address` STRING, + |`s_city` STRING, `s_nation` STRING, `s_region` STRING, `s_phone` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `customer` (`c_custkey` INT, `c_name` STRING, `c_address` STRING, + |`c_city` STRING, `c_nation` STRING, `c_region` STRING, `c_phone` STRING, + |`c_mktsegment` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `date` (`d_datekey` INT, `d_date` STRING, `d_dayofweek` STRING, + |`d_month` STRING, `d_year` INT, `d_yearmonthnum` INT, `d_yearmonth` STRING, + |`d_daynuminweek` INT, `d_daynuminmonth` INT, `d_daynuminyear` INT, `d_monthnuminyear` INT, + |`d_weeknuminyear` INT, `d_sellingseason` STRING, `d_lastdayinweekfl` STRING, + |`d_lastdayinmonthfl` STRING, `d_holidayfl` STRING, `d_weekdayfl` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `lineorder` (`lo_orderkey` INT, `lo_linenumber` INT, `lo_custkey` INT, + |`lo_partkey` INT, `lo_suppkey` INT, `lo_orderdate` INT, `lo_orderpriority` STRING, + |`lo_shippriority` STRING, `lo_quantity` INT, `lo_extendedprice` INT, + |`lo_ordertotalprice` INT, `lo_discount` INT, `lo_revenue` INT, `lo_supplycost` INT, + |`lo_tax` INT, `lo_commitdate` INT, `lo_shipmode` STRING) + |USING parquet + """.stripMargin) + } + + val ssbQueries = Seq( + "1.1", "1.2", "1.3", "2.1", "2.2", "2.3", "3.1", "3.2", "3.3", "3.4", "4.1", "4.2", "4.3") + + ssbQueries.foreach { name => + val queryString = resourceToString(s"ssb/$name.sql", + classLoader = Thread.currentThread.getContextClassLoader) + test(name) { + // check the plans can be properly generated + val plan = sql(queryString).queryExecution.executedPlan + checkGeneratedCode(plan) + } + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala index 69ac92e..e3e7005 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala @@ -17,9 +17,7 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.resourceToString -import org.apache.spark.util.Utils /** * This test suite ensures all the TPC-H queries can be successfully analyzed, optimized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org