Repository: spark
Updated Branches:
  refs/heads/branch-2.2 8b2d8385c -> ac9a0f692


[SPARK-22161][SQL] Add Impala-modified TPC-DS queries

## What changes were proposed in this pull request?

Added IMPALA-modified TPCDS queries to TPC-DS query suites.

- Ref: https://github.com/cloudera/impala-tpcds-kit/tree/master/queries

## How was this patch tested?
N/A

Author: gatorsmile <gatorsm...@gmail.com>

Closes #19386 from gatorsmile/addImpalaQueries.

(cherry picked from commit 9ed7394a68315126b2dd00e53a444cc65b5a62ea)
Signed-off-by: gatorsmile <gatorsm...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac9a0f69
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac9a0f69
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac9a0f69

Branch: refs/heads/branch-2.2
Commit: ac9a0f6923a72ec8f92fe88760cf50a67497b666
Parents: 8b2d838
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Fri Sep 29 08:59:42 2017 -0700
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Fri Sep 29 09:00:15 2017 -0700

----------------------------------------------------------------------
 .../resources/tpcds-modifiedQueries/q10.sql     |  70 ++++++
 .../resources/tpcds-modifiedQueries/q19.sql     |  38 ++++
 .../resources/tpcds-modifiedQueries/q27.sql     |  43 ++++
 .../test/resources/tpcds-modifiedQueries/q3.sql | 228 +++++++++++++++++++
 .../resources/tpcds-modifiedQueries/q34.sql     |  45 ++++
 .../resources/tpcds-modifiedQueries/q42.sql     |  28 +++
 .../resources/tpcds-modifiedQueries/q43.sql     |  36 +++
 .../resources/tpcds-modifiedQueries/q46.sql     |  80 +++++++
 .../resources/tpcds-modifiedQueries/q52.sql     |  27 +++
 .../resources/tpcds-modifiedQueries/q53.sql     |  37 +++
 .../resources/tpcds-modifiedQueries/q55.sql     |  24 ++
 .../resources/tpcds-modifiedQueries/q59.sql     |  83 +++++++
 .../resources/tpcds-modifiedQueries/q63.sql     |  29 +++
 .../resources/tpcds-modifiedQueries/q65.sql     |  58 +++++
 .../resources/tpcds-modifiedQueries/q68.sql     |  62 +++++
 .../test/resources/tpcds-modifiedQueries/q7.sql |  31 +++
 .../resources/tpcds-modifiedQueries/q73.sql     |  49 ++++
 .../resources/tpcds-modifiedQueries/q79.sql     |  59 +++++
 .../resources/tpcds-modifiedQueries/q89.sql     |  43 ++++
 .../resources/tpcds-modifiedQueries/q98.sql     |  32 +++
 .../resources/tpcds-modifiedQueries/ss_max.sql  |  14 ++
 .../org/apache/spark/sql/TPCDSQuerySuite.scala  |  26 ++-
 22 files changed, 1141 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
new file mode 100755
index 0000000..79dd3d5
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
@@ -0,0 +1,70 @@
+-- start query 10 in stream 0 using template query10.tpl
+with 
+v1 as (
+  select 
+     ws_bill_customer_sk as customer_sk
+  from web_sales,
+       date_dim
+  where ws_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3
+  union all
+  select 
+    cs_ship_customer_sk as customer_sk
+  from catalog_sales,
+       date_dim 
+  where cs_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3
+),
+v2 as (
+  select 
+    ss_customer_sk as customer_sk
+  from store_sales,
+       date_dim
+  where ss_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3 
+)
+select
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3,
+  cd_dep_count,
+  count(*) cnt4,
+  cd_dep_employed_count,
+  count(*) cnt5,
+  cd_dep_college_count,
+  count(*) cnt6
+from customer c
+join customer_address ca on (c.c_current_addr_sk = ca.ca_address_sk)
+join customer_demographics on (cd_demo_sk = c.c_current_cdemo_sk) 
+left semi join v1 on (v1.customer_sk = c.c_customer_sk) 
+left semi join v2 on (v2.customer_sk = c.c_customer_sk)
+where 
+  ca_county in ('Walker County','Richland County','Gaines County','Douglas 
County','Dona Ana County')
+group by 
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  cd_purchase_estimate,
+  cd_credit_rating,
+  cd_dep_count,
+  cd_dep_employed_count,
+  cd_dep_college_count
+order by 
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  cd_purchase_estimate,
+  cd_credit_rating,
+  cd_dep_count,
+  cd_dep_employed_count,
+  cd_dep_college_count
+limit 100
+-- end query 10 in stream 0 using template query10.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql
new file mode 100755
index 0000000..1799827
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql
@@ -0,0 +1,38 @@
+-- start query 19 in stream 0 using template query19.tpl
+select
+  i_brand_id brand_id,
+  i_brand brand,
+  i_manufact_id,
+  i_manufact,
+  sum(ss_ext_sales_price) ext_price
+from
+  date_dim,
+  store_sales,
+  item,
+  customer,
+  customer_address,
+  store
+where
+  d_date_sk = ss_sold_date_sk
+  and ss_item_sk = i_item_sk
+  and i_manager_id = 7
+  and d_moy = 11
+  and d_year = 1999
+  and ss_customer_sk = c_customer_sk
+  and c_current_addr_sk = ca_address_sk
+  and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5)
+  and ss_store_sk = s_store_sk
+  and ss_sold_date_sk between 2451484 and 2451513  -- partition key filter
+group by
+  i_brand,
+  i_brand_id,
+  i_manufact_id,
+  i_manufact
+order by
+  ext_price desc,
+  i_brand,
+  i_brand_id,
+  i_manufact_id,
+  i_manufact
+limit 100
+-- end query 19 in stream 0 using template query19.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q27.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q27.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q27.sql
new file mode 100755
index 0000000..dedbc62
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q27.sql
@@ -0,0 +1,43 @@
+-- start query 27 in stream 0 using template query27.tpl
+ with results as
+ (select i_item_id,
+        s_state,
+        ss_quantity agg1,
+        ss_list_price agg2,
+        ss_coupon_amt agg3,
+        ss_sales_price agg4
+        --0 as g_state,
+        --avg(ss_quantity) agg1,
+        --avg(ss_list_price) agg2,
+        --avg(ss_coupon_amt) agg3,
+        --avg(ss_sales_price) agg4
+ from store_sales, customer_demographics, date_dim, store, item
+ where ss_sold_date_sk = d_date_sk and
+       ss_sold_date_sk between 2451545 and 2451910 and
+       ss_item_sk = i_item_sk and
+       ss_store_sk = s_store_sk and
+       ss_cdemo_sk = cd_demo_sk and
+       cd_gender = 'F' and
+       cd_marital_status = 'D' and
+       cd_education_status = 'Primary' and
+       d_year = 2000 and
+       s_state in ('TN','AL', 'SD', 'SD', 'SD', 'SD')
+ --group by i_item_id, s_state
+ )
+
+ select i_item_id,
+  s_state, g_state, agg1, agg2, agg3, agg4
+   from (
+        select i_item_id, s_state, 0 as g_state, avg(agg1) agg1, avg(agg2) 
agg2, avg(agg3) agg3, avg(agg4) agg4 from results
+        group by i_item_id, s_state
+         union all
+        select i_item_id, NULL AS s_state, 1 AS g_state, avg(agg1) agg1, 
avg(agg2) agg2, avg(agg3) agg3,
+         avg(agg4) agg4 from results
+        group by i_item_id
+         union all
+        select NULL AS i_item_id, NULL as s_state, 1 as g_state, avg(agg1) 
agg1, avg(agg2) agg2, avg(agg3) agg3,
+         avg(agg4) agg4 from results
+        ) foo
+  order by i_item_id, s_state
+ limit 100
+-- end query 27 in stream 0 using template query27.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q3.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q3.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q3.sql
new file mode 100755
index 0000000..35b0a20
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q3.sql
@@ -0,0 +1,228 @@
+-- start query 3 in stream 0 using template query3.tpl
+select
+  dt.d_year,
+  item.i_brand_id brand_id,
+  item.i_brand brand,
+  sum(ss_net_profit) sum_agg
+from
+  date_dim dt,
+  store_sales,
+  item
+where
+  dt.d_date_sk = store_sales.ss_sold_date_sk
+  and store_sales.ss_item_sk = item.i_item_sk
+  and item.i_manufact_id = 436
+  and dt.d_moy = 12
+  -- partition key filters
+  and ( 
+ss_sold_date_sk between 2415355 and 2415385
+or ss_sold_date_sk between 2415720 and 2415750
+or ss_sold_date_sk between 2416085 and 2416115
+or ss_sold_date_sk between 2416450 and 2416480
+or ss_sold_date_sk between 2416816 and 2416846
+or ss_sold_date_sk between 2417181 and 2417211
+or ss_sold_date_sk between 2417546 and 2417576
+or ss_sold_date_sk between 2417911 and 2417941
+or ss_sold_date_sk between 2418277 and 2418307
+or ss_sold_date_sk between 2418642 and 2418672
+or ss_sold_date_sk between 2419007 and 2419037
+or ss_sold_date_sk between 2419372 and 2419402
+or ss_sold_date_sk between 2419738 and 2419768
+or ss_sold_date_sk between 2420103 and 2420133
+or ss_sold_date_sk between 2420468 and 2420498
+or ss_sold_date_sk between 2420833 and 2420863
+or ss_sold_date_sk between 2421199 and 2421229
+or ss_sold_date_sk between 2421564 and 2421594
+or ss_sold_date_sk between 2421929 and 2421959
+or ss_sold_date_sk between 2422294 and 2422324
+or ss_sold_date_sk between 2422660 and 2422690
+or ss_sold_date_sk between 2423025 and 2423055
+or ss_sold_date_sk between 2423390 and 2423420
+or ss_sold_date_sk between 2423755 and 2423785
+or ss_sold_date_sk between 2424121 and 2424151
+or ss_sold_date_sk between 2424486 and 2424516
+or ss_sold_date_sk between 2424851 and 2424881
+or ss_sold_date_sk between 2425216 and 2425246
+or ss_sold_date_sk between 2425582 and 2425612
+or ss_sold_date_sk between 2425947 and 2425977
+or ss_sold_date_sk between 2426312 and 2426342
+or ss_sold_date_sk between 2426677 and 2426707
+or ss_sold_date_sk between 2427043 and 2427073
+or ss_sold_date_sk between 2427408 and 2427438
+or ss_sold_date_sk between 2427773 and 2427803
+or ss_sold_date_sk between 2428138 and 2428168
+or ss_sold_date_sk between 2428504 and 2428534
+or ss_sold_date_sk between 2428869 and 2428899
+or ss_sold_date_sk between 2429234 and 2429264
+or ss_sold_date_sk between 2429599 and 2429629
+or ss_sold_date_sk between 2429965 and 2429995
+or ss_sold_date_sk between 2430330 and 2430360
+or ss_sold_date_sk between 2430695 and 2430725
+or ss_sold_date_sk between 2431060 and 2431090
+or ss_sold_date_sk between 2431426 and 2431456
+or ss_sold_date_sk between 2431791 and 2431821
+or ss_sold_date_sk between 2432156 and 2432186
+or ss_sold_date_sk between 2432521 and 2432551
+or ss_sold_date_sk between 2432887 and 2432917
+or ss_sold_date_sk between 2433252 and 2433282
+or ss_sold_date_sk between 2433617 and 2433647
+or ss_sold_date_sk between 2433982 and 2434012
+or ss_sold_date_sk between 2434348 and 2434378
+or ss_sold_date_sk between 2434713 and 2434743
+or ss_sold_date_sk between 2435078 and 2435108
+or ss_sold_date_sk between 2435443 and 2435473
+or ss_sold_date_sk between 2435809 and 2435839
+or ss_sold_date_sk between 2436174 and 2436204
+or ss_sold_date_sk between 2436539 and 2436569
+or ss_sold_date_sk between 2436904 and 2436934
+or ss_sold_date_sk between 2437270 and 2437300
+or ss_sold_date_sk between 2437635 and 2437665
+or ss_sold_date_sk between 2438000 and 2438030
+or ss_sold_date_sk between 2438365 and 2438395
+or ss_sold_date_sk between 2438731 and 2438761
+or ss_sold_date_sk between 2439096 and 2439126
+or ss_sold_date_sk between 2439461 and 2439491
+or ss_sold_date_sk between 2439826 and 2439856
+or ss_sold_date_sk between 2440192 and 2440222
+or ss_sold_date_sk between 2440557 and 2440587
+or ss_sold_date_sk between 2440922 and 2440952
+or ss_sold_date_sk between 2441287 and 2441317
+or ss_sold_date_sk between 2441653 and 2441683
+or ss_sold_date_sk between 2442018 and 2442048
+or ss_sold_date_sk between 2442383 and 2442413
+or ss_sold_date_sk between 2442748 and 2442778
+or ss_sold_date_sk between 2443114 and 2443144
+or ss_sold_date_sk between 2443479 and 2443509
+or ss_sold_date_sk between 2443844 and 2443874
+or ss_sold_date_sk between 2444209 and 2444239
+or ss_sold_date_sk between 2444575 and 2444605
+or ss_sold_date_sk between 2444940 and 2444970
+or ss_sold_date_sk between 2445305 and 2445335
+or ss_sold_date_sk between 2445670 and 2445700
+or ss_sold_date_sk between 2446036 and 2446066
+or ss_sold_date_sk between 2446401 and 2446431
+or ss_sold_date_sk between 2446766 and 2446796
+or ss_sold_date_sk between 2447131 and 2447161
+or ss_sold_date_sk between 2447497 and 2447527
+or ss_sold_date_sk between 2447862 and 2447892
+or ss_sold_date_sk between 2448227 and 2448257
+or ss_sold_date_sk between 2448592 and 2448622
+or ss_sold_date_sk between 2448958 and 2448988
+or ss_sold_date_sk between 2449323 and 2449353
+or ss_sold_date_sk between 2449688 and 2449718
+or ss_sold_date_sk between 2450053 and 2450083
+or ss_sold_date_sk between 2450419 and 2450449
+or ss_sold_date_sk between 2450784 and 2450814
+or ss_sold_date_sk between 2451149 and 2451179
+or ss_sold_date_sk between 2451514 and 2451544
+or ss_sold_date_sk between 2451880 and 2451910
+or ss_sold_date_sk between 2452245 and 2452275
+or ss_sold_date_sk between 2452610 and 2452640
+or ss_sold_date_sk between 2452975 and 2453005
+or ss_sold_date_sk between 2453341 and 2453371
+or ss_sold_date_sk between 2453706 and 2453736
+or ss_sold_date_sk between 2454071 and 2454101
+or ss_sold_date_sk between 2454436 and 2454466
+or ss_sold_date_sk between 2454802 and 2454832
+or ss_sold_date_sk between 2455167 and 2455197
+or ss_sold_date_sk between 2455532 and 2455562
+or ss_sold_date_sk between 2455897 and 2455927
+or ss_sold_date_sk between 2456263 and 2456293
+or ss_sold_date_sk between 2456628 and 2456658
+or ss_sold_date_sk between 2456993 and 2457023
+or ss_sold_date_sk between 2457358 and 2457388
+or ss_sold_date_sk between 2457724 and 2457754
+or ss_sold_date_sk between 2458089 and 2458119
+or ss_sold_date_sk between 2458454 and 2458484
+or ss_sold_date_sk between 2458819 and 2458849
+or ss_sold_date_sk between 2459185 and 2459215
+or ss_sold_date_sk between 2459550 and 2459580
+or ss_sold_date_sk between 2459915 and 2459945
+or ss_sold_date_sk between 2460280 and 2460310
+or ss_sold_date_sk between 2460646 and 2460676
+or ss_sold_date_sk between 2461011 and 2461041
+or ss_sold_date_sk between 2461376 and 2461406
+or ss_sold_date_sk between 2461741 and 2461771
+or ss_sold_date_sk between 2462107 and 2462137
+or ss_sold_date_sk between 2462472 and 2462502
+or ss_sold_date_sk between 2462837 and 2462867
+or ss_sold_date_sk between 2463202 and 2463232
+or ss_sold_date_sk between 2463568 and 2463598
+or ss_sold_date_sk between 2463933 and 2463963
+or ss_sold_date_sk between 2464298 and 2464328
+or ss_sold_date_sk between 2464663 and 2464693
+or ss_sold_date_sk between 2465029 and 2465059
+or ss_sold_date_sk between 2465394 and 2465424
+or ss_sold_date_sk between 2465759 and 2465789
+or ss_sold_date_sk between 2466124 and 2466154
+or ss_sold_date_sk between 2466490 and 2466520
+or ss_sold_date_sk between 2466855 and 2466885
+or ss_sold_date_sk between 2467220 and 2467250
+or ss_sold_date_sk between 2467585 and 2467615
+or ss_sold_date_sk between 2467951 and 2467981
+or ss_sold_date_sk between 2468316 and 2468346
+or ss_sold_date_sk between 2468681 and 2468711
+or ss_sold_date_sk between 2469046 and 2469076
+or ss_sold_date_sk between 2469412 and 2469442
+or ss_sold_date_sk between 2469777 and 2469807
+or ss_sold_date_sk between 2470142 and 2470172
+or ss_sold_date_sk between 2470507 and 2470537
+or ss_sold_date_sk between 2470873 and 2470903
+or ss_sold_date_sk between 2471238 and 2471268
+or ss_sold_date_sk between 2471603 and 2471633
+or ss_sold_date_sk between 2471968 and 2471998
+or ss_sold_date_sk between 2472334 and 2472364
+or ss_sold_date_sk between 2472699 and 2472729
+or ss_sold_date_sk between 2473064 and 2473094
+or ss_sold_date_sk between 2473429 and 2473459
+or ss_sold_date_sk between 2473795 and 2473825
+or ss_sold_date_sk between 2474160 and 2474190
+or ss_sold_date_sk between 2474525 and 2474555
+or ss_sold_date_sk between 2474890 and 2474920
+or ss_sold_date_sk between 2475256 and 2475286
+or ss_sold_date_sk between 2475621 and 2475651
+or ss_sold_date_sk between 2475986 and 2476016
+or ss_sold_date_sk between 2476351 and 2476381
+or ss_sold_date_sk between 2476717 and 2476747
+or ss_sold_date_sk between 2477082 and 2477112
+or ss_sold_date_sk between 2477447 and 2477477
+or ss_sold_date_sk between 2477812 and 2477842
+or ss_sold_date_sk between 2478178 and 2478208
+or ss_sold_date_sk between 2478543 and 2478573
+or ss_sold_date_sk between 2478908 and 2478938
+or ss_sold_date_sk between 2479273 and 2479303
+or ss_sold_date_sk between 2479639 and 2479669
+or ss_sold_date_sk between 2480004 and 2480034
+or ss_sold_date_sk between 2480369 and 2480399
+or ss_sold_date_sk between 2480734 and 2480764
+or ss_sold_date_sk between 2481100 and 2481130
+or ss_sold_date_sk between 2481465 and 2481495
+or ss_sold_date_sk between 2481830 and 2481860
+or ss_sold_date_sk between 2482195 and 2482225
+or ss_sold_date_sk between 2482561 and 2482591
+or ss_sold_date_sk between 2482926 and 2482956
+or ss_sold_date_sk between 2483291 and 2483321
+or ss_sold_date_sk between 2483656 and 2483686
+or ss_sold_date_sk between 2484022 and 2484052
+or ss_sold_date_sk between 2484387 and 2484417
+or ss_sold_date_sk between 2484752 and 2484782
+or ss_sold_date_sk between 2485117 and 2485147
+or ss_sold_date_sk between 2485483 and 2485513
+or ss_sold_date_sk between 2485848 and 2485878
+or ss_sold_date_sk between 2486213 and 2486243
+or ss_sold_date_sk between 2486578 and 2486608
+or ss_sold_date_sk between 2486944 and 2486974
+or ss_sold_date_sk between 2487309 and 2487339
+or ss_sold_date_sk between 2487674 and 2487704
+or ss_sold_date_sk between 2488039 and 2488069
+)
+group by
+  dt.d_year,
+  item.i_brand,
+  item.i_brand_id
+order by
+  dt.d_year,
+  sum_agg desc,
+  brand_id
+limit 100
+-- end query 3 in stream 0 using template query3.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q34.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q34.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q34.sql
new file mode 100755
index 0000000..d11696e
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q34.sql
@@ -0,0 +1,45 @@
+-- start query 34 in stream 0 using template query34.tpl
+select
+  c_last_name,
+  c_first_name,
+  c_salutation,
+  c_preferred_cust_flag,
+  ss_ticket_number,
+  cnt
+from
+  (select
+    ss_ticket_number,
+    ss_customer_sk,
+    count(*) cnt
+  from
+    store_sales,
+    date_dim,
+    store,
+    household_demographics
+  where
+    store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and (date_dim.d_dom between 1 and 3
+      or date_dim.d_dom between 25 and 28)
+    and (household_demographics.hd_buy_potential = '>10000'
+      or household_demographics.hd_buy_potential = 'Unknown')
+    and household_demographics.hd_vehicle_count > 0
+    and (case when household_demographics.hd_vehicle_count > 0 then 
household_demographics.hd_dep_count / household_demographics.hd_vehicle_count 
else null end) > 1.2
+    and date_dim.d_year in (1998, 1998 + 1, 1998 + 2)
+    and store.s_county in ('Saginaw County', 'Sumner County', 'Appanoose 
County', 'Daviess County', 'Fairfield County', 'Raleigh County', 'Ziebach 
County', 'Williamson County')
+    and ss_sold_date_sk between 2450816 and 2451910 -- partition key filter
+  group by
+    ss_ticket_number,
+    ss_customer_sk
+  ) dn,
+  customer
+where
+  ss_customer_sk = c_customer_sk
+  and cnt between 15 and 20
+order by
+  c_last_name,
+  c_first_name,
+  c_salutation,
+  c_preferred_cust_flag desc
+-- end query 34 in stream 0 using template query34.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q42.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q42.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q42.sql
new file mode 100755
index 0000000..b6332a8
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q42.sql
@@ -0,0 +1,28 @@
+-- start query 42 in stream 0 using template query42.tpl
+select
+  dt.d_year,
+  item.i_category_id,
+  item.i_category,
+  sum(ss_ext_sales_price)
+from
+  date_dim dt,
+  store_sales,
+  item
+where
+  dt.d_date_sk = store_sales.ss_sold_date_sk
+  and store_sales.ss_item_sk = item.i_item_sk
+  and item.i_manager_id = 1
+  and dt.d_moy = 12
+  and dt.d_year = 1998
+  and ss_sold_date_sk between 2451149 and 2451179  -- partition key filter
+group by
+  dt.d_year,
+  item.i_category_id,
+  item.i_category
+order by
+  sum(ss_ext_sales_price) desc,
+  dt.d_year,
+  item.i_category_id,
+  item.i_category
+limit 100
+-- end query 42 in stream 0 using template query42.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q43.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q43.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q43.sql
new file mode 100755
index 0000000..cc2040b
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q43.sql
@@ -0,0 +1,36 @@
+-- start query 43 in stream 0 using template query43.tpl
+select
+  s_store_name,
+  s_store_id,
+  sum(case when (d_day_name = 'Sunday') then ss_sales_price else null end) 
sun_sales,
+  sum(case when (d_day_name = 'Monday') then ss_sales_price else null end) 
mon_sales,
+  sum(case when (d_day_name = 'Tuesday') then ss_sales_price else null end) 
tue_sales,
+  sum(case when (d_day_name = 'Wednesday') then ss_sales_price else null end) 
wed_sales,
+  sum(case when (d_day_name = 'Thursday') then ss_sales_price else null end) 
thu_sales,
+  sum(case when (d_day_name = 'Friday') then ss_sales_price else null end) 
fri_sales,
+  sum(case when (d_day_name = 'Saturday') then ss_sales_price else null end) 
sat_sales
+from
+  date_dim,
+  store_sales,
+  store
+where
+  d_date_sk = ss_sold_date_sk
+  and s_store_sk = ss_store_sk
+  and s_gmt_offset = -5
+  and d_year = 1998
+  and ss_sold_date_sk between 2450816 and 2451179  -- partition key filter
+group by
+  s_store_name,
+  s_store_id
+order by
+  s_store_name,
+  s_store_id,
+  sun_sales,
+  mon_sales,
+  tue_sales,
+  wed_sales,
+  thu_sales,
+  fri_sales,
+  sat_sales
+limit 100
+-- end query 43 in stream 0 using template query43.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q46.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q46.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q46.sql
new file mode 100755
index 0000000..52b7ba4
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q46.sql
@@ -0,0 +1,80 @@
+-- start query 46 in stream 0 using template query46.tpl
+select
+  c_last_name,
+  c_first_name,
+  ca_city,
+  bought_city,
+  ss_ticket_number,
+  amt,
+  profit
+from
+  (select
+    ss_ticket_number,
+    ss_customer_sk,
+    ca_city bought_city,
+    sum(ss_coupon_amt) amt,
+    sum(ss_net_profit) profit
+  from
+    store_sales,
+    date_dim,
+    store,
+    household_demographics,
+    customer_address
+  where
+    store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and store_sales.ss_addr_sk = customer_address.ca_address_sk
+    and (household_demographics.hd_dep_count = 5
+      or household_demographics.hd_vehicle_count = 3)
+    and date_dim.d_dow in (6, 0)
+    and date_dim.d_year in (1999, 1999 + 1, 1999 + 2)
+    and store.s_city in ('Midway', 'Concord', 'Spring Hill', 'Brownsville', 
'Greenville')
+    -- partition key filter
+    and ss_sold_date_sk in (2451181, 2451182, 2451188, 2451189, 2451195, 
2451196, 2451202, 2451203, 2451209, 2451210, 2451216, 2451217, 
+                            2451223, 2451224, 2451230, 2451231, 2451237, 
2451238, 2451244, 2451245, 2451251, 2451252, 2451258, 2451259, 
+                            2451265, 2451266, 2451272, 2451273, 2451279, 
2451280, 2451286, 2451287, 2451293, 2451294, 2451300, 2451301, 
+                            2451307, 2451308, 2451314, 2451315, 2451321, 
2451322, 2451328, 2451329, 2451335, 2451336, 2451342, 2451343, 
+                            2451349, 2451350, 2451356, 2451357, 2451363, 
2451364, 2451370, 2451371, 2451377, 2451378, 2451384, 2451385, 
+                            2451391, 2451392, 2451398, 2451399, 2451405, 
2451406, 2451412, 2451413, 2451419, 2451420, 2451426, 2451427, 
+                            2451433, 2451434, 2451440, 2451441, 2451447, 
2451448, 2451454, 2451455, 2451461, 2451462, 2451468, 2451469, 
+                            2451475, 2451476, 2451482, 2451483, 2451489, 
2451490, 2451496, 2451497, 2451503, 2451504, 2451510, 2451511, 
+                            2451517, 2451518, 2451524, 2451525, 2451531, 
2451532, 2451538, 2451539, 2451545, 2451546, 2451552, 2451553, 
+                            2451559, 2451560, 2451566, 2451567, 2451573, 
2451574, 2451580, 2451581, 2451587, 2451588, 2451594, 2451595, 
+                            2451601, 2451602, 2451608, 2451609, 2451615, 
2451616, 2451622, 2451623, 2451629, 2451630, 2451636, 2451637, 
+                            2451643, 2451644, 2451650, 2451651, 2451657, 
2451658, 2451664, 2451665, 2451671, 2451672, 2451678, 2451679, 
+                            2451685, 2451686, 2451692, 2451693, 2451699, 
2451700, 2451706, 2451707, 2451713, 2451714, 2451720, 2451721, 
+                            2451727, 2451728, 2451734, 2451735, 2451741, 
2451742, 2451748, 2451749, 2451755, 2451756, 2451762, 2451763, 
+                            2451769, 2451770, 2451776, 2451777, 2451783, 
2451784, 2451790, 2451791, 2451797, 2451798, 2451804, 2451805, 
+                            2451811, 2451812, 2451818, 2451819, 2451825, 
2451826, 2451832, 2451833, 2451839, 2451840, 2451846, 2451847, 
+                            2451853, 2451854, 2451860, 2451861, 2451867, 
2451868, 2451874, 2451875, 2451881, 2451882, 2451888, 2451889, 
+                            2451895, 2451896, 2451902, 2451903, 2451909, 
2451910, 2451916, 2451917, 2451923, 2451924, 2451930, 2451931, 
+                            2451937, 2451938, 2451944, 2451945, 2451951, 
2451952, 2451958, 2451959, 2451965, 2451966, 2451972, 2451973, 
+                            2451979, 2451980, 2451986, 2451987, 2451993, 
2451994, 2452000, 2452001, 2452007, 2452008, 2452014, 2452015, 
+                            2452021, 2452022, 2452028, 2452029, 2452035, 
2452036, 2452042, 2452043, 2452049, 2452050, 2452056, 2452057, 
+                            2452063, 2452064, 2452070, 2452071, 2452077, 
2452078, 2452084, 2452085, 2452091, 2452092, 2452098, 2452099, 
+                            2452105, 2452106, 2452112, 2452113, 2452119, 
2452120, 2452126, 2452127, 2452133, 2452134, 2452140, 2452141, 
+                            2452147, 2452148, 2452154, 2452155, 2452161, 
2452162, 2452168, 2452169, 2452175, 2452176, 2452182, 2452183, 
+                            2452189, 2452190, 2452196, 2452197, 2452203, 
2452204, 2452210, 2452211, 2452217, 2452218, 2452224, 2452225, 
+                            2452231, 2452232, 2452238, 2452239, 2452245, 
2452246, 2452252, 2452253, 2452259, 2452260, 2452266, 2452267, 
+                            2452273, 2452274)
+  group by
+    ss_ticket_number,
+    ss_customer_sk,
+    ss_addr_sk,
+    ca_city
+  ) dn,
+  customer,
+  customer_address current_addr
+where
+  ss_customer_sk = c_customer_sk
+  and customer.c_current_addr_sk = current_addr.ca_address_sk
+  and current_addr.ca_city <> bought_city
+order by
+  c_last_name,
+  c_first_name,
+  ca_city,
+  bought_city,
+  ss_ticket_number
+limit 100
+-- end query 46 in stream 0 using template query46.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q52.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q52.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q52.sql
new file mode 100755
index 0000000..a510eef
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q52.sql
@@ -0,0 +1,27 @@
+-- start query 52 in stream 0 using template query52.tpl
+select
+  dt.d_year,
+  item.i_brand_id brand_id,
+  item.i_brand brand,
+  sum(ss_ext_sales_price) ext_price
+from
+  date_dim dt,
+  store_sales,
+  item
+where
+  dt.d_date_sk = store_sales.ss_sold_date_sk
+  and store_sales.ss_item_sk = item.i_item_sk
+  and item.i_manager_id = 1
+  and dt.d_moy = 12
+  and dt.d_year = 1998
+  and ss_sold_date_sk between 2451149 and 2451179 -- added for partition 
pruning
+group by
+  dt.d_year,
+  item.i_brand,
+  item.i_brand_id
+order by
+  dt.d_year,
+  ext_price desc,
+  brand_id
+limit 100
+-- end query 52 in stream 0 using template query52.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q53.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q53.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q53.sql
new file mode 100755
index 0000000..fb7bb75
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q53.sql
@@ -0,0 +1,37 @@
+-- start query 53 in stream 0 using template query53.tpl
+select
+  *
+from
+  (select
+    i_manufact_id,
+    sum(ss_sales_price) sum_sales,
+    avg(sum(ss_sales_price)) over (partition by i_manufact_id) 
avg_quarterly_sales
+  from
+    item,
+    store_sales,
+    date_dim,
+    store
+  where
+    ss_item_sk = i_item_sk
+    and ss_sold_date_sk = d_date_sk
+    and ss_store_sk = s_store_sk
+    and d_month_seq in (1212, 1212 + 1, 1212 + 2, 1212 + 3, 1212 + 4, 1212 + 
5, 1212 + 6, 1212 + 7, 1212 + 8, 1212 + 9, 1212 + 10, 1212 + 11)
+    and ((i_category in ('Books', 'Children', 'Electronics')
+      and i_class in ('personal', 'portable', 'reference', 'self-help')
+      and i_brand in ('scholaramalgamalg #14', 'scholaramalgamalg #7', 
'exportiunivamalg #9', 'scholaramalgamalg #9'))
+    or (i_category in ('Women', 'Music', 'Men')
+      and i_class in ('accessories', 'classical', 'fragrances', 'pants')
+      and i_brand in ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto 
#1', 'importoamalg #1')))
+    and ss_sold_date_sk between 2451911 and 2452275 -- partition key filter
+  group by
+    i_manufact_id,
+    d_qoy
+  ) tmp1
+where
+  case when avg_quarterly_sales > 0 then abs (sum_sales - avg_quarterly_sales) 
/ avg_quarterly_sales else null end > 0.1
+order by
+  avg_quarterly_sales,
+  sum_sales,
+  i_manufact_id
+limit 100
+-- end query 53 in stream 0 using template query53.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q55.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q55.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q55.sql
new file mode 100755
index 0000000..47b1f02
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q55.sql
@@ -0,0 +1,24 @@
+-- start query 55 in stream 0 using template query55.tpl
+select
+  i_brand_id brand_id,
+  i_brand brand,
+  sum(ss_ext_sales_price) ext_price
+from
+  date_dim,
+  store_sales,
+  item
+where
+  d_date_sk = ss_sold_date_sk
+  and ss_item_sk = i_item_sk
+  and i_manager_id = 48
+  and d_moy = 11
+  and d_year = 2001
+  and ss_sold_date_sk between 2452215 and 2452244
+group by
+  i_brand,
+  i_brand_id
+order by
+  ext_price desc,
+  i_brand_id
+limit 100
+-- end query 55 in stream 0 using template query55.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q59.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q59.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q59.sql
new file mode 100755
index 0000000..3d5c4e9
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q59.sql
@@ -0,0 +1,83 @@
+-- start query 59 in stream 0 using template query59.tpl
+with
+  wss as
+  (select
+    d_week_seq,
+    ss_store_sk,
+    sum(case when (d_day_name = 'Sunday') then ss_sales_price else null end) 
sun_sales,
+    sum(case when (d_day_name = 'Monday') then ss_sales_price else null end) 
mon_sales,
+    sum(case when (d_day_name = 'Tuesday') then ss_sales_price else null end) 
tue_sales,
+    sum(case when (d_day_name = 'Wednesday') then ss_sales_price else null 
end) wed_sales,
+    sum(case when (d_day_name = 'Thursday') then ss_sales_price else null end) 
thu_sales,
+    sum(case when (d_day_name = 'Friday') then ss_sales_price else null end) 
fri_sales,
+    sum(case when (d_day_name = 'Saturday') then ss_sales_price else null end) 
sat_sales
+  from
+    store_sales,
+    date_dim
+  where
+    d_date_sk = ss_sold_date_sk
+  group by
+    d_week_seq,
+    ss_store_sk
+  )
+select
+  s_store_name1,
+  s_store_id1,
+  d_week_seq1,
+  sun_sales1 / sun_sales2,
+  mon_sales1 / mon_sales2,
+  tue_sales1 / tue_sales1,
+  wed_sales1 / wed_sales2,
+  thu_sales1 / thu_sales2,
+  fri_sales1 / fri_sales2,
+  sat_sales1 / sat_sales2
+from
+  (select
+    s_store_name s_store_name1,
+    wss.d_week_seq d_week_seq1,
+    s_store_id s_store_id1,
+    sun_sales sun_sales1,
+    mon_sales mon_sales1,
+    tue_sales tue_sales1,
+    wed_sales wed_sales1,
+    thu_sales thu_sales1,
+    fri_sales fri_sales1,
+    sat_sales sat_sales1
+  from
+    wss,
+    store,
+    date_dim d
+  where
+    d.d_week_seq = wss.d_week_seq
+    and ss_store_sk = s_store_sk
+    and d_month_seq between 1185 and 1185 + 11
+  ) y,
+  (select
+    s_store_name s_store_name2,
+    wss.d_week_seq d_week_seq2,
+    s_store_id s_store_id2,
+    sun_sales sun_sales2,
+    mon_sales mon_sales2,
+    tue_sales tue_sales2,
+    wed_sales wed_sales2,
+    thu_sales thu_sales2,
+    fri_sales fri_sales2,
+    sat_sales sat_sales2
+  from
+    wss,
+    store,
+    date_dim d
+  where
+    d.d_week_seq = wss.d_week_seq
+    and ss_store_sk = s_store_sk
+    and d_month_seq between 1185 + 12 and 1185 + 23
+  ) x
+where
+  s_store_id1 = s_store_id2
+  and d_week_seq1 = d_week_seq2 - 52
+order by
+  s_store_name1,
+  s_store_id1,
+  d_week_seq1
+limit 100
+-- end query 59 in stream 0 using template query59.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q63.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q63.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q63.sql
new file mode 100755
index 0000000..b71199a
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q63.sql
@@ -0,0 +1,29 @@
+-- start query 63 in stream 0 using template query63.tpl
+select  * 
+from (select i_manager_id
+             ,sum(ss_sales_price) sum_sales
+             ,avg(sum(ss_sales_price)) over (partition by i_manager_id) 
avg_monthly_sales
+      from item
+          ,store_sales
+          ,date_dim
+          ,store
+      where ss_item_sk = i_item_sk
+        and ss_sold_date_sk = d_date_sk
+       and ss_sold_date_sk between 2452123 and 2452487
+        and ss_store_sk = s_store_sk
+        and d_month_seq in 
(1219,1219+1,1219+2,1219+3,1219+4,1219+5,1219+6,1219+7,1219+8,1219+9,1219+10,1219+11)
+        and ((    i_category in ('Books','Children','Electronics')
+              and i_class in ('personal','portable','reference','self-help')
+              and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+                                 'exportiunivamalg #9','scholaramalgamalg #9'))
+           or(    i_category in ('Women','Music','Men')
+              and i_class in ('accessories','classical','fragrances','pants')
+              and i_brand in ('amalgimporto #1','edu packscholar 
#1','exportiimporto #1',
+                                'importoamalg #1')))
+group by i_manager_id, d_moy) tmp1
+where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) 
/ avg_monthly_sales else null end > 0.1
+order by i_manager_id
+        ,avg_monthly_sales
+        ,sum_sales
+limit 100
+-- end query 63 in stream 0 using template query63.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q65.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q65.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q65.sql
new file mode 100755
index 0000000..7344fee
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q65.sql
@@ -0,0 +1,58 @@
+-- start query 65 in stream 0 using template query65.tpl
+select
+  s_store_name,
+  i_item_desc,
+  sc.revenue,
+  i_current_price,
+  i_wholesale_cost,
+  i_brand
+from
+  store,
+  item,
+  (select
+    ss_store_sk,
+    avg(revenue) as ave
+  from
+    (select
+      ss_store_sk,
+      ss_item_sk,
+      sum(ss_sales_price) as revenue
+    from
+      store_sales,
+      date_dim
+    where
+      ss_sold_date_sk = d_date_sk
+      and d_month_seq between 1212 and 1212 + 11
+      and ss_sold_date_sk between 2451911 and 2452275  -- partition key filter
+    group by
+      ss_store_sk,
+      ss_item_sk
+    ) sa
+  group by
+    ss_store_sk
+  ) sb,
+  (select
+    ss_store_sk,
+    ss_item_sk,
+    sum(ss_sales_price) as revenue
+  from
+    store_sales,
+    date_dim
+  where
+    ss_sold_date_sk = d_date_sk
+    and d_month_seq between 1212 and 1212 + 11
+    and ss_sold_date_sk between 2451911 and 2452275  -- partition key filter
+  group by
+    ss_store_sk,
+    ss_item_sk
+  ) sc
+where
+  sb.ss_store_sk = sc.ss_store_sk
+  and sc.revenue <= 0.1 * sb.ave
+  and s_store_sk = sc.ss_store_sk
+  and i_item_sk = sc.ss_item_sk
+order by
+  s_store_name,
+  i_item_desc
+limit 100
+-- end query 65 in stream 0 using template query65.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q68.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q68.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q68.sql
new file mode 100755
index 0000000..94df4b3
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q68.sql
@@ -0,0 +1,62 @@
+-- start query 68 in stream 0 using template query68.tpl
+-- changed to match exact same partitions in original query
+select
+  c_last_name,
+  c_first_name,
+  ca_city,
+  bought_city,
+  ss_ticket_number,
+  extended_price,
+  extended_tax,
+  list_price
+from
+  (select
+    ss_ticket_number,
+    ss_customer_sk,
+    ca_city bought_city,
+    sum(ss_ext_sales_price) extended_price,
+    sum(ss_ext_list_price) list_price,
+    sum(ss_ext_tax) extended_tax
+  from
+    store_sales,
+    date_dim,
+    store,
+    household_demographics,
+    customer_address
+  where
+    store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and store_sales.ss_addr_sk = customer_address.ca_address_sk
+    and date_dim.d_dom between 1 and 2
+    and (household_demographics.hd_dep_count = 5
+      or household_demographics.hd_vehicle_count = 3)
+    and date_dim.d_year in (1999, 1999 + 1, 1999 + 2)
+    and store.s_city in ('Midway', 'Fairview')
+    -- partition key filter
+    and ss_sold_date_sk in (2451180, 2451181, 2451211, 2451212, 2451239, 
2451240, 2451270, 2451271, 2451300, 2451301, 2451331, 
+                             2451332, 2451361, 2451362, 2451392, 2451393, 
2451423, 2451424, 2451453, 2451454, 2451484, 2451485, 
+                             2451514, 2451515, 2451545, 2451546, 2451576, 
2451577, 2451605, 2451606, 2451636, 2451637, 2451666, 
+                             2451667, 2451697, 2451698, 2451727, 2451728, 
2451758, 2451759, 2451789, 2451790, 2451819, 2451820, 
+                             2451850, 2451851, 2451880, 2451881, 2451911, 
2451912, 2451942, 2451943, 2451970, 2451971, 2452001, 
+                             2452002, 2452031, 2452032, 2452062, 2452063, 
2452092, 2452093, 2452123, 2452124, 2452154, 2452155, 
+                             2452184, 2452185, 2452215, 2452216, 2452245, 
2452246) 
+    --and ss_sold_date_sk between 2451180 and 2451269 -- partition key filter 
(3 months)
+    --and d_date between '1999-01-01' and '1999-03-31'
+  group by
+    ss_ticket_number,
+    ss_customer_sk,
+    ss_addr_sk,
+    ca_city
+  ) dn,
+  customer,
+  customer_address current_addr
+where
+  ss_customer_sk = c_customer_sk
+  and customer.c_current_addr_sk = current_addr.ca_address_sk
+  and current_addr.ca_city <> bought_city
+order by
+  c_last_name,
+  ss_ticket_number
+limit 100
+-- end query 68 in stream 0 using template query68.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q7.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q7.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q7.sql
new file mode 100755
index 0000000..c61a2d0
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q7.sql
@@ -0,0 +1,31 @@
+-- start query 7 in stream 0 using template query7.tpl
+select
+  i_item_id,
+  avg(ss_quantity) agg1,
+  avg(ss_list_price) agg2,
+  avg(ss_coupon_amt) agg3,
+  avg(ss_sales_price) agg4
+from
+  store_sales,
+  customer_demographics,
+  date_dim,
+  item,
+  promotion
+where
+  ss_sold_date_sk = d_date_sk
+  and ss_item_sk = i_item_sk
+  and ss_cdemo_sk = cd_demo_sk
+  and ss_promo_sk = p_promo_sk
+  and cd_gender = 'F'
+  and cd_marital_status = 'W'
+  and cd_education_status = 'Primary'
+  and (p_channel_email = 'N'
+    or p_channel_event = 'N')
+  and d_year = 1998
+  and ss_sold_date_sk between 2450815 and 2451179 -- partition key filter
+group by
+  i_item_id
+order by
+  i_item_id
+limit 100
+-- end query 7 in stream 0 using template query7.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q73.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q73.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q73.sql
new file mode 100755
index 0000000..8703910
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q73.sql
@@ -0,0 +1,49 @@
+-- start query 73 in stream 0 using template query73.tpl
+select
+  c_last_name,
+  c_first_name,
+  c_salutation,
+  c_preferred_cust_flag,
+  ss_ticket_number,
+  cnt
+from
+  (select
+    ss_ticket_number,
+    ss_customer_sk,
+    count(*) cnt
+  from
+    store_sales,
+    date_dim,
+    store,
+    household_demographics
+  where
+    store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and date_dim.d_dom between 1 and 2
+    and (household_demographics.hd_buy_potential = '>10000'
+      or household_demographics.hd_buy_potential = 'Unknown')
+    and household_demographics.hd_vehicle_count > 0
+    and case when household_demographics.hd_vehicle_count > 0 then 
household_demographics.hd_dep_count / household_demographics.hd_vehicle_count 
else null end > 1
+    and date_dim.d_year in (1998, 1998 + 1, 1998 + 2)
+    and store.s_county in ('Fairfield County','Ziebach County','Bronx 
County','Barrow County')
+    -- partition key filter
+    and ss_sold_date_sk in (2450815, 2450816, 2450846, 2450847, 2450874, 
2450875, 2450905, 2450906, 2450935, 2450936, 2450966, 2450967, 
+                            2450996, 2450997, 2451027, 2451028, 2451058, 
2451059, 2451088, 2451089, 2451119, 2451120, 2451149, 
+                            2451150, 2451180, 2451181, 2451211, 2451212, 
2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 
+                            2451331, 2451332, 2451361, 2451362, 2451392, 
2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 
+                            2451485, 2451514, 2451515, 2451545, 2451546, 
2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 
+                            2451666, 2451667, 2451697, 2451698, 2451727, 
2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 
+                            2451820, 2451850, 2451851, 2451880, 2451881)    
+    --and ss_sold_date_sk between 2451180 and 2451269 -- partition key filter 
(3 months)
+  group by
+    ss_ticket_number,
+    ss_customer_sk
+  ) dj,
+  customer
+where
+  ss_customer_sk = c_customer_sk
+  and cnt between 1 and 5
+order by
+  cnt desc
+-- end query 73 in stream 0 using template query73.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q79.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q79.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q79.sql
new file mode 100755
index 0000000..4254310
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q79.sql
@@ -0,0 +1,59 @@
+-- start query 79 in stream 0 using template query79.tpl
+select
+  c_last_name,
+  c_first_name,
+  substr(s_city, 1, 30),
+  ss_ticket_number,
+  amt,
+  profit
+from
+  (select
+    ss_ticket_number,
+    ss_customer_sk,
+    store.s_city,
+    sum(ss_coupon_amt) amt,
+    sum(ss_net_profit) profit
+  from
+    store_sales,
+    date_dim,
+    store,
+    household_demographics
+  where
+    store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and (household_demographics.hd_dep_count = 8
+      or household_demographics.hd_vehicle_count > 0)
+    and date_dim.d_dow = 1
+     and date_dim.d_year in (1998, 1998 + 1, 1998 + 2)
+    and store.s_number_employees between 200 and 295
+    and ss_sold_date_sk between 2450819 and 2451904
+    -- partition key filter
+    --and ss_sold_date_sk in (2450819, 2450826, 2450833, 2450840, 2450847, 
2450854, 2450861, 2450868, 2450875, 2450882, 2450889,
+    -- 2450896, 2450903, 2450910, 2450917, 2450924, 2450931, 2450938, 2450945, 
2450952, 2450959, 2450966, 2450973, 2450980, 2450987,
+    -- 2450994, 2451001, 2451008, 2451015, 2451022, 2451029, 2451036, 2451043, 
2451050, 2451057, 2451064, 2451071, 2451078, 2451085,
+    -- 2451092, 2451099, 2451106, 2451113, 2451120, 2451127, 2451134, 2451141, 
2451148, 2451155, 2451162, 2451169, 2451176, 2451183,
+    -- 2451190, 2451197, 2451204, 2451211, 2451218, 2451225, 2451232, 2451239, 
2451246, 2451253, 2451260, 2451267, 2451274, 2451281,
+    -- 2451288, 2451295, 2451302, 2451309, 2451316, 2451323, 2451330, 2451337, 
2451344, 2451351, 2451358, 2451365, 2451372, 2451379,
+    -- 2451386, 2451393, 2451400, 2451407, 2451414, 2451421, 2451428, 2451435, 
2451442, 2451449, 2451456, 2451463, 2451470, 2451477,
+    -- 2451484, 2451491, 2451498, 2451505, 2451512, 2451519, 2451526, 2451533, 
2451540, 2451547, 2451554, 2451561, 2451568, 2451575,
+    -- 2451582, 2451589, 2451596, 2451603, 2451610, 2451617, 2451624, 2451631, 
2451638, 2451645, 2451652, 2451659, 2451666, 2451673,
+    -- 2451680, 2451687, 2451694, 2451701, 2451708, 2451715, 2451722, 2451729, 
2451736, 2451743, 2451750, 2451757, 2451764, 2451771,
+    -- 2451778, 2451785, 2451792, 2451799, 2451806, 2451813, 2451820, 2451827, 
2451834, 2451841, 2451848, 2451855, 2451862, 2451869,
+    -- 2451876, 2451883, 2451890, 2451897, 2451904)    
+  group by
+    ss_ticket_number,
+    ss_customer_sk,
+    ss_addr_sk,
+    store.s_city
+  ) ms,
+  customer
+where
+  ss_customer_sk = c_customer_sk
+order by
+  c_last_name,
+  c_first_name,
+  substr(s_city, 1, 30),
+  profit 
+  limit 100
+-- end query 79 in stream 0 using template query79.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q89.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q89.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q89.sql
new file mode 100755
index 0000000..b1d814a
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q89.sql
@@ -0,0 +1,43 @@
+-- start query 89 in stream 0 using template query89.tpl
+select
+  *
+from
+  (select
+    i_category,
+    i_class,
+    i_brand,
+    s_store_name,
+    s_company_name,
+    d_moy,
+    sum(ss_sales_price) sum_sales,
+    avg(sum(ss_sales_price)) over (partition by i_category, i_brand, 
s_store_name, s_company_name) avg_monthly_sales
+  from
+    item,
+    store_sales,
+    date_dim,
+    store
+  where
+    ss_item_sk = i_item_sk
+    and ss_sold_date_sk = d_date_sk
+    and ss_store_sk = s_store_sk
+    and d_year in (2000)
+    and ((i_category in ('Home', 'Books', 'Electronics')
+        and i_class in ('wallpaper', 'parenting', 'musical'))
+      or (i_category in ('Shoes', 'Jewelry', 'Men')
+        and i_class in ('womens', 'birdal', 'pants')))
+    and ss_sold_date_sk between 2451545 and 2451910  -- partition key filter
+  group by
+    i_category,
+    i_class,
+    i_brand,
+    s_store_name,
+    s_company_name,
+    d_moy
+  ) tmp1
+where
+  case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) 
/ avg_monthly_sales) else null end > 0.1
+order by
+  sum_sales - avg_monthly_sales,
+  s_store_name
+limit 100
+-- end query 89 in stream 0 using template query89.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q98.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q98.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q98.sql
new file mode 100755
index 0000000..f53f2f5
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q98.sql
@@ -0,0 +1,32 @@
+-- start query 98 in stream 0 using template query98.tpl
+select
+  i_item_desc,
+  i_category,
+  i_class,
+  i_current_price,
+  sum(ss_ext_sales_price) as itemrevenue,
+  sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) over (partition 
by i_class) as revenueratio
+from
+  store_sales,
+  item,
+  date_dim
+where
+  ss_item_sk = i_item_sk
+  and i_category in ('Jewelry', 'Sports', 'Books')
+  and ss_sold_date_sk = d_date_sk
+  and ss_sold_date_sk between 2451911 and 2451941  -- partition key filter (1 
calendar month)
+  and d_date between '2001-01-01' and '2001-01-31'
+group by
+  i_item_id,
+  i_item_desc,
+  i_category,
+  i_class,
+  i_current_price
+order by
+  i_category,
+  i_class,
+  i_item_id,
+  i_item_desc,
+  revenueratio
+--limit 1000; -- added limit
+-- end query 98 in stream 0 using template query98.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/ss_max.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/ss_max.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/ss_max.sql
new file mode 100755
index 0000000..bf58b4b
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/ss_max.sql
@@ -0,0 +1,14 @@
+select 
+  count(*) as total,
+  count(ss_sold_date_sk) as not_null_total,
+  count(distinct ss_sold_date_sk) as unique_days,
+  max(ss_sold_date_sk) as max_ss_sold_date_sk,
+  max(ss_sold_time_sk) as max_ss_sold_time_sk,
+  max(ss_item_sk) as max_ss_item_sk,
+  max(ss_customer_sk) as max_ss_customer_sk,
+  max(ss_cdemo_sk) as max_ss_cdemo_sk,
+  max(ss_hdemo_sk) as max_ss_hdemo_sk,
+  max(ss_addr_sk) as max_ss_addr_sk,
+  max(ss_store_sk) as max_ss_store_sk,
+  max(ss_promo_sk) as max_ss_promo_sk
+from store_sales

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index c0797fa..e47d4b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -22,9 +22,18 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.sql.catalyst.util.resourceToString
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.util.Utils
 
+/**
+ * This test suite ensures all the TPC-DS queries can be successfully analyzed 
and optimized
+ * without hitting the max iteration threshold.
+ */
 class TPCDSQuerySuite extends QueryTest with SharedSQLContext with 
BeforeAndAfterAll {
 
+  // When Utils.isTesting is true, the RuleExecutor will issue an exception 
when hitting
+  // the max iteration of analyzer/optimizer batches.
+  assert(Utils.isTesting, "spark.testing is not set to true")
+
   /**
    * Drop all the tables
    */
@@ -341,8 +350,23 @@ class TPCDSQuerySuite extends QueryTest with 
SharedSQLContext with BeforeAndAfte
       classLoader = Thread.currentThread().getContextClassLoader)
     test(name) {
       withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
-        sql(queryString).collect()
+        // Just check the plans can be properly generated
+        sql(queryString).queryExecution.executedPlan
       }
     }
   }
+
+  // These queries are from 
https://github.com/cloudera/impala-tpcds-kit/tree/master/queries
+  val modifiedTPCDSQueries = Seq(
+    "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", 
"q55", "q59",
+    "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max")
+
+  modifiedTPCDSQueries.foreach { name =>
+    val queryString = resourceToString(s"tpcds-modifiedQueries/$name.sql",
+      classLoader = Thread.currentThread().getContextClassLoader)
+    test(s"modified-$name") {
+      // Just check the plans can be properly generated
+      sql(queryString).queryExecution.executedPlan
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to