Repository: spark Updated Branches: refs/heads/master 0ea2d8c12 -> 3fea5c4f1
[SPARK-22787][TEST][SQL] Add a TPC-H query suite ## What changes were proposed in this pull request? Add a test suite to ensure all the TPC-H queries can be successfully analyzed, optimized and compiled without hitting the max iteration threshold. ## How was this patch tested? N/A Author: gatorsmile <gatorsm...@gmail.com> Closes #19982 from gatorsmile/testTPCH. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3fea5c4f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3fea5c4f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3fea5c4f Branch: refs/heads/master Commit: 3fea5c4f19cb5369ff8bbeca80768a8aadb463f5 Parents: 0ea2d8c Author: gatorsmile <gatorsm...@gmail.com> Authored: Thu Dec 14 22:56:57 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Thu Dec 14 22:56:57 2017 -0800 ---------------------------------------------------------------------- .../datasources/text/TextFileFormat.scala | 7 +- sql/core/src/test/resources/tpch/q1.sql | 23 ++++ sql/core/src/test/resources/tpch/q10.sql | 34 ++++++ sql/core/src/test/resources/tpch/q11.sql | 29 +++++ sql/core/src/test/resources/tpch/q12.sql | 30 +++++ sql/core/src/test/resources/tpch/q13.sql | 22 ++++ sql/core/src/test/resources/tpch/q14.sql | 15 +++ sql/core/src/test/resources/tpch/q15.sql | 35 ++++++ sql/core/src/test/resources/tpch/q16.sql | 32 ++++++ sql/core/src/test/resources/tpch/q17.sql | 19 ++++ sql/core/src/test/resources/tpch/q18.sql | 35 ++++++ sql/core/src/test/resources/tpch/q19.sql | 37 +++++++ sql/core/src/test/resources/tpch/q2.sql | 46 ++++++++ sql/core/src/test/resources/tpch/q20.sql | 39 +++++++ sql/core/src/test/resources/tpch/q21.sql | 42 +++++++ sql/core/src/test/resources/tpch/q22.sql | 39 +++++++ sql/core/src/test/resources/tpch/q3.sql | 25 +++++ sql/core/src/test/resources/tpch/q4.sql | 23 ++++ sql/core/src/test/resources/tpch/q5.sql | 26 +++++ sql/core/src/test/resources/tpch/q6.sql | 11 ++ sql/core/src/test/resources/tpch/q7.sql | 41 +++++++ sql/core/src/test/resources/tpch/q8.sql | 39 +++++++ sql/core/src/test/resources/tpch/q9.sql | 34 ++++++ .../apache/spark/sql/BenchmarkQueryTest.scala | 78 +++++++++++++ .../org/apache/spark/sql/TPCDSQuerySuite.scala | 58 +--------- .../org/apache/spark/sql/TPCHQuerySuite.scala | 110 +++++++++++++++++++ 26 files changed, 872 insertions(+), 57 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala index 8a6ab30..c661e9b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala @@ -116,9 +116,10 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister { readToUnsafeMem(broadcastedHadoopConf, requiredSchema, textOptions.wholeText) } - private def readToUnsafeMem(conf: Broadcast[SerializableConfiguration], - requiredSchema: StructType, wholeTextMode: Boolean): - (PartitionedFile) => Iterator[UnsafeRow] = { + private def readToUnsafeMem( + conf: Broadcast[SerializableConfiguration], + requiredSchema: StructType, + wholeTextMode: Boolean): (PartitionedFile) => Iterator[UnsafeRow] = { (file: PartitionedFile) => { val confValue = conf.value.value http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q1.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q1.sql b/sql/core/src/test/resources/tpch/q1.sql new file mode 100644 index 0000000..73eb8d8 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q1.sql @@ -0,0 +1,23 @@ +-- using default substitutions + +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= date '1998-12-01' - interval '90' day +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q10.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q10.sql b/sql/core/src/test/resources/tpch/q10.sql new file mode 100644 index 0000000..3b2ae58 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q10.sql @@ -0,0 +1,34 @@ +-- using default substitutions + +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= date '1993-10-01' + and o_orderdate < date '1993-10-01' + interval '3' month + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc +limit 20 http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q11.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q11.sql b/sql/core/src/test/resources/tpch/q11.sql new file mode 100644 index 0000000..531e78c --- /dev/null +++ b/sql/core/src/test/resources/tpch/q11.sql @@ -0,0 +1,29 @@ +-- using default substitutions + +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * 0.0001000000 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'GERMANY' + ) +order by + value desc http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q12.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q12.sql b/sql/core/src/test/resources/tpch/q12.sql new file mode 100644 index 0000000..d3e70eb --- /dev/null +++ b/sql/core/src/test/resources/tpch/q12.sql @@ -0,0 +1,30 @@ +-- using default substitutions + +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('MAIL', 'SHIP') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= date '1994-01-01' + and l_receiptdate < date '1994-01-01' + interval '1' year +group by + l_shipmode +order by + l_shipmode http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q13.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q13.sql b/sql/core/src/test/resources/tpch/q13.sql new file mode 100644 index 0000000..3375002 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q13.sql @@ -0,0 +1,22 @@ +-- using default substitutions + +select + c_count, + count(*) as custdist +from + ( + select + c_custkey, + count(o_orderkey) as c_count + from + customer left outer join orders on + c_custkey = o_custkey + and o_comment not like '%special%requests%' + group by + c_custkey + ) as c_orders +group by + c_count +order by + custdist desc, + c_count desc http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q14.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q14.sql b/sql/core/src/test/resources/tpch/q14.sql new file mode 100644 index 0000000..753ea56 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q14.sql @@ -0,0 +1,15 @@ +-- using default substitutions + +select + 100.00 * sum(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= date '1995-09-01' + and l_shipdate < date '1995-09-01' + interval '1' month http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q15.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q15.sql b/sql/core/src/test/resources/tpch/q15.sql new file mode 100644 index 0000000..64d0b48 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q15.sql @@ -0,0 +1,35 @@ +-- using default substitutions + +with revenue0 as + (select + l_suppkey as supplier_no, + sum(l_extendedprice * (1 - l_discount)) as total_revenue + from + lineitem + where + l_shipdate >= date '1996-01-01' + and l_shipdate < date '1996-01-01' + interval '3' month + group by + l_suppkey) + + +select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue +from + supplier, + revenue0 +where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue0 + ) +order by + s_suppkey + http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q16.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q16.sql b/sql/core/src/test/resources/tpch/q16.sql new file mode 100644 index 0000000..a6ac688 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q16.sql @@ -0,0 +1,32 @@ +-- using default substitutions + +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'MEDIUM POLISHED%' + and p_size in (49, 14, 23, 45, 19, 3, 36, 9) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q17.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q17.sql b/sql/core/src/test/resources/tpch/q17.sql new file mode 100644 index 0000000..74fb1f6 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q17.sql @@ -0,0 +1,19 @@ +-- using default substitutions + +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container = 'MED BOX' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ) http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q18.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q18.sql b/sql/core/src/test/resources/tpch/q18.sql new file mode 100644 index 0000000..210fba1 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q18.sql @@ -0,0 +1,35 @@ +-- using default substitutions + +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > 300 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate +limit 100 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q19.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q19.sql b/sql/core/src/test/resources/tpch/q19.sql new file mode 100644 index 0000000..c07327d --- /dev/null +++ b/sql/core/src/test/resources/tpch/q19.sql @@ -0,0 +1,37 @@ +-- using default substitutions + +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 1 and l_quantity <= 1 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 10 and l_quantity <= 10 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#34' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 20 and l_quantity <= 20 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q2.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q2.sql b/sql/core/src/test/resources/tpch/q2.sql new file mode 100644 index 0000000..d0e3b7e --- /dev/null +++ b/sql/core/src/test/resources/tpch/q2.sql @@ -0,0 +1,46 @@ +-- using default substitutions + +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 15 + and p_type like '%BRASS' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'EUROPE' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey +limit 100 http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q20.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q20.sql b/sql/core/src/test/resources/tpch/q20.sql new file mode 100644 index 0000000..e161d34 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q20.sql @@ -0,0 +1,39 @@ +-- using default substitutions + +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'forest%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + ) + ) + and s_nationkey = n_nationkey + and n_name = 'CANADA' +order by + s_name http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q21.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q21.sql b/sql/core/src/test/resources/tpch/q21.sql new file mode 100644 index 0000000..fdcdfbc --- /dev/null +++ b/sql/core/src/test/resources/tpch/q21.sql @@ -0,0 +1,42 @@ +-- using default substitutions + +select + s_name, + count(*) as numwait +from + supplier, + lineitem l1, + orders, + nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'SAUDI ARABIA' +group by + s_name +order by + numwait desc, + s_name +limit 100 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q22.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q22.sql b/sql/core/src/test/resources/tpch/q22.sql new file mode 100644 index 0000000..1d7706e --- /dev/null +++ b/sql/core/src/test/resources/tpch/q22.sql @@ -0,0 +1,39 @@ +-- using default substitutions + +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substring(c_phone, 1, 2) as cntrycode, + c_acctbal + from + customer + where + substring(c_phone, 1, 2) in + ('13', '31', '23', '29', '30', '18', '17') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substring(c_phone, 1, 2) in + ('13', '31', '23', '29', '30', '18', '17') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q3.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q3.sql b/sql/core/src/test/resources/tpch/q3.sql new file mode 100644 index 0000000..948d6bc --- /dev/null +++ b/sql/core/src/test/resources/tpch/q3.sql @@ -0,0 +1,25 @@ +-- using default substitutions + +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = 'BUILDING' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < date '1995-03-15' + and l_shipdate > date '1995-03-15' +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate +limit 10 http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q4.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q4.sql b/sql/core/src/test/resources/tpch/q4.sql new file mode 100644 index 0000000..67330e3 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q4.sql @@ -0,0 +1,23 @@ +-- using default substitutions + +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= date '1993-07-01' + and o_orderdate < date '1993-07-01' + interval '3' month + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q5.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q5.sql b/sql/core/src/test/resources/tpch/q5.sql new file mode 100644 index 0000000..b973e9f --- /dev/null +++ b/sql/core/src/test/resources/tpch/q5.sql @@ -0,0 +1,26 @@ +-- using default substitutions + +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and o_orderdate >= date '1994-01-01' + and o_orderdate < date '1994-01-01' + interval '1' year +group by + n_name +order by + revenue desc http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q6.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q6.sql b/sql/core/src/test/resources/tpch/q6.sql new file mode 100644 index 0000000..2229457 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q6.sql @@ -0,0 +1,11 @@ +-- using default substitutions + +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= date '1994-01-01' + and l_shipdate < date '1994-01-01' + interval '1' year + and l_discount between .06 - 0.01 and .06 + 0.01 + and l_quantity < 24 http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q7.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q7.sql b/sql/core/src/test/resources/tpch/q7.sql new file mode 100644 index 0000000..21105c0 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q7.sql @@ -0,0 +1,41 @@ +-- using default substitutions + +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from + ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + year(l_shipdate) as l_year, + l_extendedprice * (1 - l_discount) as volume + from + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') + or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') + ) + and l_shipdate between date '1995-01-01' and date '1996-12-31' + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q8.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q8.sql b/sql/core/src/test/resources/tpch/q8.sql new file mode 100644 index 0000000..81d8187 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q8.sql @@ -0,0 +1,39 @@ +-- using default substitutions + +select + o_year, + sum(case + when nation = 'BRAZIL' then volume + else 0 + end) / sum(volume) as mkt_share +from + ( + select + year(o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'AMERICA' + and s_nationkey = n2.n_nationkey + and o_orderdate between date '1995-01-01' and date '1996-12-31' + and p_type = 'ECONOMY ANODIZED STEEL' + ) as all_nations +group by + o_year +order by + o_year http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q9.sql ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/tpch/q9.sql b/sql/core/src/test/resources/tpch/q9.sql new file mode 100644 index 0000000..a4e8e83 --- /dev/null +++ b/sql/core/src/test/resources/tpch/q9.sql @@ -0,0 +1,34 @@ +-- using default substitutions + +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + year(o_orderdate) as o_year, + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%green%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala new file mode 100644 index 0000000..7037749 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator} +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec} +import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.util.Utils + +abstract class BenchmarkQueryTest extends QueryTest with SharedSQLContext with BeforeAndAfterAll { + + // When Utils.isTesting is true, the RuleExecutor will issue an exception when hitting + // the max iteration of analyzer/optimizer batches. + assert(Utils.isTesting, "spark.testing is not set to true") + + /** + * Drop all the tables + */ + protected override def afterAll(): Unit = { + try { + // For debugging dump some statistics about how much time was spent in various optimizer rules + logWarning(RuleExecutor.dumpTimeSpent()) + spark.sessionState.catalog.reset() + } finally { + super.afterAll() + } + } + + override def beforeAll() { + super.beforeAll() + RuleExecutor.resetTime() + } + + protected def checkGeneratedCode(plan: SparkPlan): Unit = { + val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]() + plan foreach { + case s: WholeStageCodegenExec => + codegenSubtrees += s + case s => s + } + codegenSubtrees.toSeq.foreach { subtree => + val code = subtree.doCodeGen()._2 + try { + // Just check the generated code can be properly compiled + CodeGenerator.compile(code) + } catch { + case e: Exception => + val msg = + s""" + |failed to compile: + |Subtree: + |$subtree + |Generated code: + |${CodeFormatter.format(code)} + """.stripMargin + throw new Exception(msg, e) + } + } + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala index dd427a5..1a58418 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala @@ -17,41 +17,18 @@ package org.apache.spark.sql -import org.scalatest.BeforeAndAfterAll - -import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator} -import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.resourceToString -import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSQLContext -import org.apache.spark.util.Utils /** - * This test suite ensures all the TPC-DS queries can be successfully analyzed and optimized - * without hitting the max iteration threshold. + * This test suite ensures all the TPC-DS queries can be successfully analyzed, optimized + * and compiled without hitting the max iteration threshold. */ -class TPCDSQuerySuite extends QueryTest with SharedSQLContext with BeforeAndAfterAll { - - // When Utils.isTesting is true, the RuleExecutor will issue an exception when hitting - // the max iteration of analyzer/optimizer batches. - assert(Utils.isTesting, "spark.testing is not set to true") - - /** - * Drop all the tables - */ - protected override def afterAll(): Unit = { - try { - // For debugging dump some statistics about how much time was spent in various optimizer rules - logWarning(RuleExecutor.dumpTimeSpent()) - spark.sessionState.catalog.reset() - } finally { - super.afterAll() - } - } +class TPCDSQuerySuite extends BenchmarkQueryTest { override def beforeAll() { super.beforeAll() + sql( """ |CREATE TABLE `catalog_page` ( @@ -350,33 +327,6 @@ class TPCDSQuerySuite extends QueryTest with SharedSQLContext with BeforeAndAfte "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") - private def checkGeneratedCode(plan: SparkPlan): Unit = { - val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]() - plan foreach { - case s: WholeStageCodegenExec => - codegenSubtrees += s - case s => s - } - codegenSubtrees.toSeq.foreach { subtree => - val code = subtree.doCodeGen()._2 - try { - // Just check the generated code can be properly compiled - CodeGenerator.compile(code) - } catch { - case e: Exception => - val msg = - s""" - |failed to compile: - |Subtree: - |$subtree - |Generated code: - |${CodeFormatter.format(code)} - """.stripMargin - throw new Exception(msg, e) - } - } - } - tpcdsQueries.foreach { name => val queryString = resourceToString(s"tpcds/$name.sql", classLoader = Thread.currentThread().getContextClassLoader) http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala new file mode 100644 index 0000000..69ac92e --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.catalyst.util.resourceToString +import org.apache.spark.util.Utils + +/** + * This test suite ensures all the TPC-H queries can be successfully analyzed, optimized + * and compiled without hitting the max iteration threshold. + */ +class TPCHQuerySuite extends BenchmarkQueryTest { + + override def beforeAll() { + super.beforeAll() + + sql( + """ + |CREATE TABLE `orders` ( + |`o_orderkey` BIGINT, `o_custkey` BIGINT, `o_orderstatus` STRING, + |`o_totalprice` DECIMAL(10,0), `o_orderdate` DATE, `o_orderpriority` STRING, + |`o_clerk` STRING, `o_shippriority` INT, `o_comment` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `nation` ( + |`n_nationkey` BIGINT, `n_name` STRING, `n_regionkey` BIGINT, `n_comment` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `region` ( + |`r_regionkey` BIGINT, `r_name` STRING, `r_comment` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `part` (`p_partkey` BIGINT, `p_name` STRING, `p_mfgr` STRING, + |`p_brand` STRING, `p_type` STRING, `p_size` INT, `p_container` STRING, + |`p_retailprice` DECIMAL(10,0), `p_comment` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `partsupp` (`ps_partkey` BIGINT, `ps_suppkey` BIGINT, + |`ps_availqty` INT, `ps_supplycost` DECIMAL(10,0), `ps_comment` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `customer` (`c_custkey` BIGINT, `c_name` STRING, `c_address` STRING, + |`c_nationkey` STRING, `c_phone` STRING, `c_acctbal` DECIMAL(10,0), + |`c_mktsegment` STRING, `c_comment` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `supplier` (`s_suppkey` BIGINT, `s_name` STRING, `s_address` STRING, + |`s_nationkey` BIGINT, `s_phone` STRING, `s_acctbal` DECIMAL(10,0), `s_comment` STRING) + |USING parquet + """.stripMargin) + + sql( + """ + |CREATE TABLE `lineitem` (`l_orderkey` BIGINT, `l_partkey` BIGINT, `l_suppkey` BIGINT, + |`l_linenumber` INT, `l_quantity` DECIMAL(10,0), `l_extendedprice` DECIMAL(10,0), + |`l_discount` DECIMAL(10,0), `l_tax` DECIMAL(10,0), `l_returnflag` STRING, + |`l_linestatus` STRING, `l_shipdate` DATE, `l_commitdate` DATE, `l_receiptdate` DATE, + |`l_shipinstruct` STRING, `l_shipmode` STRING, `l_comment` STRING) + |USING parquet + """.stripMargin) + } + + val tpchQueries = Seq( + "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15", "q16", "q17", "q18", "q19", "q20", "q21", "q22") + + tpchQueries.foreach { name => + val queryString = resourceToString(s"tpch/$name.sql", + classLoader = Thread.currentThread().getContextClassLoader) + test(name) { + // check the plans can be properly generated + val plan = sql(queryString).queryExecution.executedPlan + checkGeneratedCode(plan) + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org