Repository: spark
Updated Branches:
  refs/heads/master 0ea2d8c12 -> 3fea5c4f1


[SPARK-22787][TEST][SQL] Add a TPC-H query suite

## What changes were proposed in this pull request?
Add a test suite to ensure all the TPC-H queries can be successfully analyzed, 
optimized and compiled without hitting the max iteration threshold.

## How was this patch tested?
N/A

Author: gatorsmile <gatorsm...@gmail.com>

Closes #19982 from gatorsmile/testTPCH.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3fea5c4f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3fea5c4f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3fea5c4f

Branch: refs/heads/master
Commit: 3fea5c4f19cb5369ff8bbeca80768a8aadb463f5
Parents: 0ea2d8c
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Thu Dec 14 22:56:57 2017 -0800
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Thu Dec 14 22:56:57 2017 -0800

----------------------------------------------------------------------
 .../datasources/text/TextFileFormat.scala       |   7 +-
 sql/core/src/test/resources/tpch/q1.sql         |  23 ++++
 sql/core/src/test/resources/tpch/q10.sql        |  34 ++++++
 sql/core/src/test/resources/tpch/q11.sql        |  29 +++++
 sql/core/src/test/resources/tpch/q12.sql        |  30 +++++
 sql/core/src/test/resources/tpch/q13.sql        |  22 ++++
 sql/core/src/test/resources/tpch/q14.sql        |  15 +++
 sql/core/src/test/resources/tpch/q15.sql        |  35 ++++++
 sql/core/src/test/resources/tpch/q16.sql        |  32 ++++++
 sql/core/src/test/resources/tpch/q17.sql        |  19 ++++
 sql/core/src/test/resources/tpch/q18.sql        |  35 ++++++
 sql/core/src/test/resources/tpch/q19.sql        |  37 +++++++
 sql/core/src/test/resources/tpch/q2.sql         |  46 ++++++++
 sql/core/src/test/resources/tpch/q20.sql        |  39 +++++++
 sql/core/src/test/resources/tpch/q21.sql        |  42 +++++++
 sql/core/src/test/resources/tpch/q22.sql        |  39 +++++++
 sql/core/src/test/resources/tpch/q3.sql         |  25 +++++
 sql/core/src/test/resources/tpch/q4.sql         |  23 ++++
 sql/core/src/test/resources/tpch/q5.sql         |  26 +++++
 sql/core/src/test/resources/tpch/q6.sql         |  11 ++
 sql/core/src/test/resources/tpch/q7.sql         |  41 +++++++
 sql/core/src/test/resources/tpch/q8.sql         |  39 +++++++
 sql/core/src/test/resources/tpch/q9.sql         |  34 ++++++
 .../apache/spark/sql/BenchmarkQueryTest.scala   |  78 +++++++++++++
 .../org/apache/spark/sql/TPCDSQuerySuite.scala  |  58 +---------
 .../org/apache/spark/sql/TPCHQuerySuite.scala   | 110 +++++++++++++++++++
 26 files changed, 872 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index 8a6ab30..c661e9b 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -116,9 +116,10 @@ class TextFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
     readToUnsafeMem(broadcastedHadoopConf, requiredSchema, 
textOptions.wholeText)
   }
 
-  private def readToUnsafeMem(conf: Broadcast[SerializableConfiguration],
-      requiredSchema: StructType, wholeTextMode: Boolean):
-  (PartitionedFile) => Iterator[UnsafeRow] = {
+  private def readToUnsafeMem(
+      conf: Broadcast[SerializableConfiguration],
+      requiredSchema: StructType,
+      wholeTextMode: Boolean): (PartitionedFile) => Iterator[UnsafeRow] = {
 
     (file: PartitionedFile) => {
       val confValue = conf.value.value

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q1.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q1.sql 
b/sql/core/src/test/resources/tpch/q1.sql
new file mode 100644
index 0000000..73eb8d8
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q1.sql
@@ -0,0 +1,23 @@
+-- using default substitutions
+
+select
+       l_returnflag,
+       l_linestatus,
+       sum(l_quantity) as sum_qty,
+       sum(l_extendedprice) as sum_base_price,
+       sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+       sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+       avg(l_quantity) as avg_qty,
+       avg(l_extendedprice) as avg_price,
+       avg(l_discount) as avg_disc,
+       count(*) as count_order
+from
+       lineitem
+where
+       l_shipdate <= date '1998-12-01' - interval '90' day
+group by
+       l_returnflag,
+       l_linestatus
+order by
+       l_returnflag,
+       l_linestatus

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q10.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q10.sql 
b/sql/core/src/test/resources/tpch/q10.sql
new file mode 100644
index 0000000..3b2ae58
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q10.sql
@@ -0,0 +1,34 @@
+-- using default substitutions
+
+select
+       c_custkey,
+       c_name,
+       sum(l_extendedprice * (1 - l_discount)) as revenue,
+       c_acctbal,
+       n_name,
+       c_address,
+       c_phone,
+       c_comment
+from
+       customer,
+       orders,
+       lineitem,
+       nation
+where
+       c_custkey = o_custkey
+       and l_orderkey = o_orderkey
+       and o_orderdate >= date '1993-10-01'
+       and o_orderdate < date '1993-10-01' + interval '3' month
+       and l_returnflag = 'R'
+       and c_nationkey = n_nationkey
+group by
+       c_custkey,
+       c_name,
+       c_acctbal,
+       c_phone,
+       n_name,
+       c_address,
+       c_comment
+order by
+       revenue desc
+limit 20

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q11.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q11.sql 
b/sql/core/src/test/resources/tpch/q11.sql
new file mode 100644
index 0000000..531e78c
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q11.sql
@@ -0,0 +1,29 @@
+-- using default substitutions
+
+select
+       ps_partkey,
+       sum(ps_supplycost * ps_availqty) as value
+from
+       partsupp,
+       supplier,
+       nation
+where
+       ps_suppkey = s_suppkey
+       and s_nationkey = n_nationkey
+       and n_name = 'GERMANY'
+group by
+       ps_partkey having
+               sum(ps_supplycost * ps_availqty) > (
+                       select
+                               sum(ps_supplycost * ps_availqty) * 0.0001000000
+                       from
+                               partsupp,
+                               supplier,
+                               nation
+                       where
+                               ps_suppkey = s_suppkey
+                               and s_nationkey = n_nationkey
+                               and n_name = 'GERMANY'
+               )
+order by
+       value desc

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q12.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q12.sql 
b/sql/core/src/test/resources/tpch/q12.sql
new file mode 100644
index 0000000..d3e70eb
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q12.sql
@@ -0,0 +1,30 @@
+-- using default substitutions
+
+select
+       l_shipmode,
+       sum(case
+               when o_orderpriority = '1-URGENT'
+                       or o_orderpriority = '2-HIGH'
+                       then 1
+               else 0
+       end) as high_line_count,
+       sum(case
+               when o_orderpriority <> '1-URGENT'
+                       and o_orderpriority <> '2-HIGH'
+                       then 1
+               else 0
+       end) as low_line_count
+from
+       orders,
+       lineitem
+where
+       o_orderkey = l_orderkey
+       and l_shipmode in ('MAIL', 'SHIP')
+       and l_commitdate < l_receiptdate
+       and l_shipdate < l_commitdate
+       and l_receiptdate >= date '1994-01-01'
+       and l_receiptdate < date '1994-01-01' + interval '1' year
+group by
+       l_shipmode
+order by
+       l_shipmode

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q13.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q13.sql 
b/sql/core/src/test/resources/tpch/q13.sql
new file mode 100644
index 0000000..3375002
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q13.sql
@@ -0,0 +1,22 @@
+-- using default substitutions
+
+select
+       c_count,
+       count(*) as custdist
+from
+       (
+               select
+                       c_custkey,
+                       count(o_orderkey) as c_count
+               from
+                       customer left outer join orders on
+                               c_custkey = o_custkey
+                               and o_comment not like '%special%requests%'
+               group by
+                       c_custkey
+       ) as c_orders
+group by
+       c_count
+order by
+       custdist desc,
+       c_count desc

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q14.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q14.sql 
b/sql/core/src/test/resources/tpch/q14.sql
new file mode 100644
index 0000000..753ea56
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q14.sql
@@ -0,0 +1,15 @@
+-- using default substitutions
+
+select
+       100.00 * sum(case
+               when p_type like 'PROMO%'
+                       then l_extendedprice * (1 - l_discount)
+               else 0
+       end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
+from
+       lineitem,
+       part
+where
+       l_partkey = p_partkey
+       and l_shipdate >= date '1995-09-01'
+       and l_shipdate < date '1995-09-01' + interval '1' month

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q15.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q15.sql 
b/sql/core/src/test/resources/tpch/q15.sql
new file mode 100644
index 0000000..64d0b48
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q15.sql
@@ -0,0 +1,35 @@
+-- using default substitutions
+
+with revenue0 as
+       (select
+               l_suppkey as supplier_no,
+               sum(l_extendedprice * (1 - l_discount)) as total_revenue
+       from
+               lineitem
+       where
+               l_shipdate >= date '1996-01-01'
+               and l_shipdate < date '1996-01-01' + interval '3' month
+       group by
+               l_suppkey)
+
+
+select
+       s_suppkey,
+       s_name,
+       s_address,
+       s_phone,
+       total_revenue
+from
+       supplier,
+       revenue0
+where
+       s_suppkey = supplier_no
+       and total_revenue = (
+               select
+                       max(total_revenue)
+               from
+                       revenue0
+       )
+order by
+       s_suppkey
+

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q16.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q16.sql 
b/sql/core/src/test/resources/tpch/q16.sql
new file mode 100644
index 0000000..a6ac688
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q16.sql
@@ -0,0 +1,32 @@
+-- using default substitutions
+
+select
+       p_brand,
+       p_type,
+       p_size,
+       count(distinct ps_suppkey) as supplier_cnt
+from
+       partsupp,
+       part
+where
+       p_partkey = ps_partkey
+       and p_brand <> 'Brand#45'
+       and p_type not like 'MEDIUM POLISHED%'
+       and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
+       and ps_suppkey not in (
+               select
+                       s_suppkey
+               from
+                       supplier
+               where
+                       s_comment like '%Customer%Complaints%'
+       )
+group by
+       p_brand,
+       p_type,
+       p_size
+order by
+       supplier_cnt desc,
+       p_brand,
+       p_type,
+       p_size

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q17.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q17.sql 
b/sql/core/src/test/resources/tpch/q17.sql
new file mode 100644
index 0000000..74fb1f6
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q17.sql
@@ -0,0 +1,19 @@
+-- using default substitutions
+
+select
+       sum(l_extendedprice) / 7.0 as avg_yearly
+from
+       lineitem,
+       part
+where
+       p_partkey = l_partkey
+       and p_brand = 'Brand#23'
+       and p_container = 'MED BOX'
+       and l_quantity < (
+               select
+                       0.2 * avg(l_quantity)
+               from
+                       lineitem
+               where
+                       l_partkey = p_partkey
+       )

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q18.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q18.sql 
b/sql/core/src/test/resources/tpch/q18.sql
new file mode 100644
index 0000000..210fba1
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q18.sql
@@ -0,0 +1,35 @@
+-- using default substitutions
+
+select
+       c_name,
+       c_custkey,
+       o_orderkey,
+       o_orderdate,
+       o_totalprice,
+       sum(l_quantity)
+from
+       customer,
+       orders,
+       lineitem
+where
+       o_orderkey in (
+               select
+                       l_orderkey
+               from
+                       lineitem
+               group by
+                       l_orderkey having
+                               sum(l_quantity) > 300
+       )
+       and c_custkey = o_custkey
+       and o_orderkey = l_orderkey
+group by
+       c_name,
+       c_custkey,
+       o_orderkey,
+       o_orderdate,
+       o_totalprice
+order by
+       o_totalprice desc,
+       o_orderdate
+limit 100
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q19.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q19.sql 
b/sql/core/src/test/resources/tpch/q19.sql
new file mode 100644
index 0000000..c07327d
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q19.sql
@@ -0,0 +1,37 @@
+-- using default substitutions
+
+select
+       sum(l_extendedprice* (1 - l_discount)) as revenue
+from
+       lineitem,
+       part
+where
+       (
+               p_partkey = l_partkey
+               and p_brand = 'Brand#12'
+               and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+               and l_quantity >= 1 and l_quantity <= 1 + 10
+               and p_size between 1 and 5
+               and l_shipmode in ('AIR', 'AIR REG')
+               and l_shipinstruct = 'DELIVER IN PERSON'
+       )
+       or
+       (
+               p_partkey = l_partkey
+               and p_brand = 'Brand#23'
+               and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+               and l_quantity >= 10 and l_quantity <= 10 + 10
+               and p_size between 1 and 10
+               and l_shipmode in ('AIR', 'AIR REG')
+               and l_shipinstruct = 'DELIVER IN PERSON'
+       )
+       or
+       (
+               p_partkey = l_partkey
+               and p_brand = 'Brand#34'
+               and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+               and l_quantity >= 20 and l_quantity <= 20 + 10
+               and p_size between 1 and 15
+               and l_shipmode in ('AIR', 'AIR REG')
+               and l_shipinstruct = 'DELIVER IN PERSON'
+       )

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q2.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q2.sql 
b/sql/core/src/test/resources/tpch/q2.sql
new file mode 100644
index 0000000..d0e3b7e
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q2.sql
@@ -0,0 +1,46 @@
+-- using default substitutions
+
+select
+       s_acctbal,
+       s_name,
+       n_name,
+       p_partkey,
+       p_mfgr,
+       s_address,
+       s_phone,
+       s_comment
+from
+       part,
+       supplier,
+       partsupp,
+       nation,
+       region
+where
+       p_partkey = ps_partkey
+       and s_suppkey = ps_suppkey
+       and p_size = 15
+       and p_type like '%BRASS'
+       and s_nationkey = n_nationkey
+       and n_regionkey = r_regionkey
+       and r_name = 'EUROPE'
+       and ps_supplycost = (
+               select
+                       min(ps_supplycost)
+               from
+                       partsupp,
+                       supplier,
+                       nation,
+                       region
+               where
+                       p_partkey = ps_partkey
+                       and s_suppkey = ps_suppkey
+                       and s_nationkey = n_nationkey
+                       and n_regionkey = r_regionkey
+                       and r_name = 'EUROPE'
+       )
+order by
+       s_acctbal desc,
+       n_name,
+       s_name,
+       p_partkey
+limit 100

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q20.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q20.sql 
b/sql/core/src/test/resources/tpch/q20.sql
new file mode 100644
index 0000000..e161d34
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q20.sql
@@ -0,0 +1,39 @@
+-- using default substitutions
+
+select
+       s_name,
+       s_address
+from
+       supplier,
+       nation
+where
+       s_suppkey in (
+               select
+                       ps_suppkey
+               from
+                       partsupp
+               where
+                       ps_partkey in (
+                               select
+                                       p_partkey
+                               from
+                                       part
+                               where
+                                       p_name like 'forest%'
+                       )
+                       and ps_availqty > (
+                               select
+                                       0.5 * sum(l_quantity)
+                               from
+                                       lineitem
+                               where
+                                       l_partkey = ps_partkey
+                                       and l_suppkey = ps_suppkey
+                                       and l_shipdate >= date '1994-01-01'
+                                       and l_shipdate < date '1994-01-01' + 
interval '1' year
+                       )
+       )
+       and s_nationkey = n_nationkey
+       and n_name = 'CANADA'
+order by
+       s_name

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q21.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q21.sql 
b/sql/core/src/test/resources/tpch/q21.sql
new file mode 100644
index 0000000..fdcdfbc
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q21.sql
@@ -0,0 +1,42 @@
+-- using default substitutions
+
+select
+       s_name,
+       count(*) as numwait
+from
+       supplier,
+       lineitem l1,
+       orders,
+       nation
+where
+       s_suppkey = l1.l_suppkey
+       and o_orderkey = l1.l_orderkey
+       and o_orderstatus = 'F'
+       and l1.l_receiptdate > l1.l_commitdate
+       and exists (
+               select
+                       *
+               from
+                       lineitem l2
+               where
+                       l2.l_orderkey = l1.l_orderkey
+                       and l2.l_suppkey <> l1.l_suppkey
+       )
+       and not exists (
+               select
+                       *
+               from
+                       lineitem l3
+               where
+                       l3.l_orderkey = l1.l_orderkey
+                       and l3.l_suppkey <> l1.l_suppkey
+                       and l3.l_receiptdate > l3.l_commitdate
+       )
+       and s_nationkey = n_nationkey
+       and n_name = 'SAUDI ARABIA'
+group by
+       s_name
+order by
+       numwait desc,
+       s_name
+limit 100
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q22.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q22.sql 
b/sql/core/src/test/resources/tpch/q22.sql
new file mode 100644
index 0000000..1d7706e
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q22.sql
@@ -0,0 +1,39 @@
+-- using default substitutions
+
+select
+       cntrycode,
+       count(*) as numcust,
+       sum(c_acctbal) as totacctbal
+from
+       (
+               select
+                       substring(c_phone, 1, 2) as cntrycode,
+                       c_acctbal
+               from
+                       customer
+               where
+                       substring(c_phone, 1, 2) in
+                               ('13', '31', '23', '29', '30', '18', '17')
+                       and c_acctbal > (
+                               select
+                                       avg(c_acctbal)
+                               from
+                                       customer
+                               where
+                                       c_acctbal > 0.00
+                                       and substring(c_phone, 1, 2) in
+                                               ('13', '31', '23', '29', '30', 
'18', '17')
+                       )
+                       and not exists (
+                               select
+                                       *
+                               from
+                                       orders
+                               where
+                                       o_custkey = c_custkey
+                       )
+       ) as custsale
+group by
+       cntrycode
+order by
+       cntrycode

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q3.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q3.sql 
b/sql/core/src/test/resources/tpch/q3.sql
new file mode 100644
index 0000000..948d6bc
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q3.sql
@@ -0,0 +1,25 @@
+-- using default substitutions
+
+select
+       l_orderkey,
+       sum(l_extendedprice * (1 - l_discount)) as revenue,
+       o_orderdate,
+       o_shippriority
+from
+       customer,
+       orders,
+       lineitem
+where
+       c_mktsegment = 'BUILDING'
+       and c_custkey = o_custkey
+       and l_orderkey = o_orderkey
+       and o_orderdate < date '1995-03-15'
+       and l_shipdate > date '1995-03-15'
+group by
+       l_orderkey,
+       o_orderdate,
+       o_shippriority
+order by
+       revenue desc,
+       o_orderdate
+limit 10

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q4.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q4.sql 
b/sql/core/src/test/resources/tpch/q4.sql
new file mode 100644
index 0000000..67330e3
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q4.sql
@@ -0,0 +1,23 @@
+-- using default substitutions
+
+select
+       o_orderpriority,
+       count(*) as order_count
+from
+       orders
+where
+       o_orderdate >= date '1993-07-01'
+       and o_orderdate < date '1993-07-01' + interval '3' month
+       and exists (
+               select
+                       *
+               from
+                       lineitem
+               where
+                       l_orderkey = o_orderkey
+                       and l_commitdate < l_receiptdate
+       )
+group by
+       o_orderpriority
+order by
+       o_orderpriority

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q5.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q5.sql 
b/sql/core/src/test/resources/tpch/q5.sql
new file mode 100644
index 0000000..b973e9f
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q5.sql
@@ -0,0 +1,26 @@
+-- using default substitutions
+
+select
+       n_name,
+       sum(l_extendedprice * (1 - l_discount)) as revenue
+from
+       customer,
+       orders,
+       lineitem,
+       supplier,
+       nation,
+       region
+where
+       c_custkey = o_custkey
+       and l_orderkey = o_orderkey
+       and l_suppkey = s_suppkey
+       and c_nationkey = s_nationkey
+       and s_nationkey = n_nationkey
+       and n_regionkey = r_regionkey
+       and r_name = 'ASIA'
+       and o_orderdate >= date '1994-01-01'
+       and o_orderdate < date '1994-01-01' + interval '1' year
+group by
+       n_name
+order by
+       revenue desc

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q6.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q6.sql 
b/sql/core/src/test/resources/tpch/q6.sql
new file mode 100644
index 0000000..2229457
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q6.sql
@@ -0,0 +1,11 @@
+-- using default substitutions
+
+select
+       sum(l_extendedprice * l_discount) as revenue
+from
+       lineitem
+where
+       l_shipdate >= date '1994-01-01'
+       and l_shipdate < date '1994-01-01' + interval '1' year
+       and l_discount between .06 - 0.01 and .06 + 0.01
+       and l_quantity < 24

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q7.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q7.sql 
b/sql/core/src/test/resources/tpch/q7.sql
new file mode 100644
index 0000000..21105c0
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q7.sql
@@ -0,0 +1,41 @@
+-- using default substitutions
+
+select
+       supp_nation,
+       cust_nation,
+       l_year,
+       sum(volume) as revenue
+from
+       (
+               select
+                       n1.n_name as supp_nation,
+                       n2.n_name as cust_nation,
+                       year(l_shipdate) as l_year,
+                       l_extendedprice * (1 - l_discount) as volume
+               from
+                       supplier,
+                       lineitem,
+                       orders,
+                       customer,
+                       nation n1,
+                       nation n2
+               where
+                       s_suppkey = l_suppkey
+                       and o_orderkey = l_orderkey
+                       and c_custkey = o_custkey
+                       and s_nationkey = n1.n_nationkey
+                       and c_nationkey = n2.n_nationkey
+                       and (
+                               (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
+                               or (n1.n_name = 'GERMANY' and n2.n_name = 
'FRANCE')
+                       )
+                       and l_shipdate between date '1995-01-01' and date 
'1996-12-31'
+       ) as shipping
+group by
+       supp_nation,
+       cust_nation,
+       l_year
+order by
+       supp_nation,
+       cust_nation,
+       l_year

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q8.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q8.sql 
b/sql/core/src/test/resources/tpch/q8.sql
new file mode 100644
index 0000000..81d8187
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q8.sql
@@ -0,0 +1,39 @@
+-- using default substitutions
+
+select
+       o_year,
+       sum(case
+               when nation = 'BRAZIL' then volume
+               else 0
+       end) / sum(volume) as mkt_share
+from
+       (
+               select
+                       year(o_orderdate) as o_year,
+                       l_extendedprice * (1 - l_discount) as volume,
+                       n2.n_name as nation
+               from
+                       part,
+                       supplier,
+                       lineitem,
+                       orders,
+                       customer,
+                       nation n1,
+                       nation n2,
+                       region
+               where
+                       p_partkey = l_partkey
+                       and s_suppkey = l_suppkey
+                       and l_orderkey = o_orderkey
+                       and o_custkey = c_custkey
+                       and c_nationkey = n1.n_nationkey
+                       and n1.n_regionkey = r_regionkey
+                       and r_name = 'AMERICA'
+                       and s_nationkey = n2.n_nationkey
+                       and o_orderdate between date '1995-01-01' and date 
'1996-12-31'
+                       and p_type = 'ECONOMY ANODIZED STEEL'
+       ) as all_nations
+group by
+       o_year
+order by
+       o_year

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/resources/tpch/q9.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpch/q9.sql 
b/sql/core/src/test/resources/tpch/q9.sql
new file mode 100644
index 0000000..a4e8e83
--- /dev/null
+++ b/sql/core/src/test/resources/tpch/q9.sql
@@ -0,0 +1,34 @@
+-- using default substitutions
+
+select
+       nation,
+       o_year,
+       sum(amount) as sum_profit
+from
+       (
+               select
+                       n_name as nation,
+                       year(o_orderdate) as o_year,
+                       l_extendedprice * (1 - l_discount) - ps_supplycost * 
l_quantity as amount
+               from
+                       part,
+                       supplier,
+                       lineitem,
+                       partsupp,
+                       orders,
+                       nation
+               where
+                       s_suppkey = l_suppkey
+                       and ps_suppkey = l_suppkey
+                       and ps_partkey = l_partkey
+                       and p_partkey = l_partkey
+                       and o_orderkey = l_orderkey
+                       and s_nationkey = n_nationkey
+                       and p_name like '%green%'
+       ) as profit
+group by
+       nation,
+       o_year
+order by
+       nation,
+       o_year desc

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
new file mode 100644
index 0000000..7037749
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, 
CodeGenerator}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.util.Utils
+
+abstract class BenchmarkQueryTest extends QueryTest with SharedSQLContext with 
BeforeAndAfterAll {
+
+  // When Utils.isTesting is true, the RuleExecutor will issue an exception 
when hitting
+  // the max iteration of analyzer/optimizer batches.
+  assert(Utils.isTesting, "spark.testing is not set to true")
+
+  /**
+   * Drop all the tables
+   */
+  protected override def afterAll(): Unit = {
+    try {
+      // For debugging dump some statistics about how much time was spent in 
various optimizer rules
+      logWarning(RuleExecutor.dumpTimeSpent())
+      spark.sessionState.catalog.reset()
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  override def beforeAll() {
+    super.beforeAll()
+    RuleExecutor.resetTime()
+  }
+
+  protected def checkGeneratedCode(plan: SparkPlan): Unit = {
+    val codegenSubtrees = new 
collection.mutable.HashSet[WholeStageCodegenExec]()
+    plan foreach {
+      case s: WholeStageCodegenExec =>
+        codegenSubtrees += s
+      case s => s
+    }
+    codegenSubtrees.toSeq.foreach { subtree =>
+      val code = subtree.doCodeGen()._2
+      try {
+        // Just check the generated code can be properly compiled
+        CodeGenerator.compile(code)
+      } catch {
+        case e: Exception =>
+          val msg =
+            s"""
+               |failed to compile:
+               |Subtree:
+               |$subtree
+               |Generated code:
+               |${CodeFormatter.format(code)}
+             """.stripMargin
+          throw new Exception(msg, e)
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index dd427a5..1a58418 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -17,41 +17,18 @@
 
 package org.apache.spark.sql
 
-import org.scalatest.BeforeAndAfterAll
-
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, 
CodeGenerator}
-import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.resourceToString
-import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.util.Utils
 
 /**
- * This test suite ensures all the TPC-DS queries can be successfully analyzed 
and optimized
- * without hitting the max iteration threshold.
+ * This test suite ensures all the TPC-DS queries can be successfully 
analyzed, optimized
+ * and compiled without hitting the max iteration threshold.
  */
-class TPCDSQuerySuite extends QueryTest with SharedSQLContext with 
BeforeAndAfterAll {
-
-  // When Utils.isTesting is true, the RuleExecutor will issue an exception 
when hitting
-  // the max iteration of analyzer/optimizer batches.
-  assert(Utils.isTesting, "spark.testing is not set to true")
-
-  /**
-   * Drop all the tables
-   */
-  protected override def afterAll(): Unit = {
-    try {
-      // For debugging dump some statistics about how much time was spent in 
various optimizer rules
-      logWarning(RuleExecutor.dumpTimeSpent())
-      spark.sessionState.catalog.reset()
-    } finally {
-      super.afterAll()
-    }
-  }
+class TPCDSQuerySuite extends BenchmarkQueryTest {
 
   override def beforeAll() {
     super.beforeAll()
+
     sql(
       """
         |CREATE TABLE `catalog_page` (
@@ -350,33 +327,6 @@ class TPCDSQuerySuite extends QueryTest with 
SharedSQLContext with BeforeAndAfte
     "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
     "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
 
-  private def checkGeneratedCode(plan: SparkPlan): Unit = {
-    val codegenSubtrees = new 
collection.mutable.HashSet[WholeStageCodegenExec]()
-    plan foreach {
-      case s: WholeStageCodegenExec =>
-        codegenSubtrees += s
-      case s => s
-    }
-    codegenSubtrees.toSeq.foreach { subtree =>
-      val code = subtree.doCodeGen()._2
-      try {
-        // Just check the generated code can be properly compiled
-        CodeGenerator.compile(code)
-      } catch {
-        case e: Exception =>
-          val msg =
-            s"""
-               |failed to compile:
-               |Subtree:
-               |$subtree
-               |Generated code:
-               |${CodeFormatter.format(code)}
-             """.stripMargin
-          throw new Exception(msg, e)
-      }
-    }
-  }
-
   tpcdsQueries.foreach { name =>
     val queryString = resourceToString(s"tpcds/$name.sql",
       classLoader = Thread.currentThread().getContextClassLoader)

http://git-wip-us.apache.org/repos/asf/spark/blob/3fea5c4f/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
new file mode 100644
index 0000000..69ac92e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.util.resourceToString
+import org.apache.spark.util.Utils
+
+/**
+ * This test suite ensures all the TPC-H queries can be successfully analyzed, 
optimized
+ * and compiled without hitting the max iteration threshold.
+ */
+class TPCHQuerySuite extends BenchmarkQueryTest {
+
+  override def beforeAll() {
+    super.beforeAll()
+
+    sql(
+      """
+        |CREATE TABLE `orders` (
+        |`o_orderkey` BIGINT, `o_custkey` BIGINT, `o_orderstatus` STRING,
+        |`o_totalprice` DECIMAL(10,0), `o_orderdate` DATE, `o_orderpriority` 
STRING,
+        |`o_clerk` STRING, `o_shippriority` INT, `o_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `nation` (
+        |`n_nationkey` BIGINT, `n_name` STRING, `n_regionkey` BIGINT, 
`n_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `region` (
+        |`r_regionkey` BIGINT, `r_name` STRING, `r_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `part` (`p_partkey` BIGINT, `p_name` STRING, `p_mfgr` 
STRING,
+        |`p_brand` STRING, `p_type` STRING, `p_size` INT, `p_container` STRING,
+        |`p_retailprice` DECIMAL(10,0), `p_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `partsupp` (`ps_partkey` BIGINT, `ps_suppkey` BIGINT,
+        |`ps_availqty` INT, `ps_supplycost` DECIMAL(10,0), `ps_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `customer` (`c_custkey` BIGINT, `c_name` STRING, 
`c_address` STRING,
+        |`c_nationkey` STRING, `c_phone` STRING, `c_acctbal` DECIMAL(10,0),
+        |`c_mktsegment` STRING, `c_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `supplier` (`s_suppkey` BIGINT, `s_name` STRING, 
`s_address` STRING,
+        |`s_nationkey` BIGINT, `s_phone` STRING, `s_acctbal` DECIMAL(10,0), 
`s_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `lineitem` (`l_orderkey` BIGINT, `l_partkey` BIGINT, 
`l_suppkey` BIGINT,
+        |`l_linenumber` INT, `l_quantity` DECIMAL(10,0), `l_extendedprice` 
DECIMAL(10,0),
+        |`l_discount` DECIMAL(10,0), `l_tax` DECIMAL(10,0), `l_returnflag` 
STRING,
+        |`l_linestatus` STRING, `l_shipdate` DATE, `l_commitdate` DATE, 
`l_receiptdate` DATE,
+        |`l_shipinstruct` STRING, `l_shipmode` STRING, `l_comment` STRING)
+        |USING parquet
+      """.stripMargin)
+  }
+
+  val tpchQueries = Seq(
+    "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11",
+    "q12", "q13", "q14", "q15", "q16", "q17", "q18", "q19", "q20", "q21", 
"q22")
+
+  tpchQueries.foreach { name =>
+    val queryString = resourceToString(s"tpch/$name.sql",
+      classLoader = Thread.currentThread().getContextClassLoader)
+    test(name) {
+      // check the plans can be properly generated
+      val plan = sql(queryString).queryExecution.executedPlan
+      checkGeneratedCode(plan)
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to