[
https://issues.apache.org/jira/browse/HIVE-8315?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14161506#comment-14161506
]
Mostafa Mokhtar commented on HIVE-8315:
---------------------------------------
[~rhbutani]
The correct plan is not getting generated, most likely because HIVE-8280 is not
addressed yet.
item x store_sales should match the PK/FK pattern and currently PK returns
selectivity of 1 due to issue mentioned in HIVE-8280.
{code}
2014-10-07 01:17:16,747 DEBUG [main]: parse.SemanticAnalyzer
(SemanticAnalyzer.java:apply(12316)) - HiveSortRel(sort0=[$0], sort1=[$1],
sort2=[$20], dir0=[ASC], dir1=[ASC], dir2=[ASC]): rowcount =
424827.22498719255, cumulative cost = {6.678648133048896E9 rows,
424827.22498719255 cpu, 0.0 io}, id = 1311
HiveProjectRel(product_name=[$0], store_name=[$2], store_zip=[$3],
b_street_number=[$4], b_streen_name=[$5], b_city=[$6], b_zip=[$7],
c_street_number=[$8], c_street_name=[$9], c_city=[$10], c_zip=[$11],
syear=[$12], cnt=[$13], s1=[$14], s2=[$15], s3=[$16], s11=[$22], s21=[$23],
s31=[$24], syear1=[$20], cnt1=[$21]): rowcount = 424827.22498719255, cumulative
cost = {6.216184283941706E9 rows, 0.0 cpu, 0.0 io}, id = 1309
HiveFilterRel(condition=[<=($21, $13)]): rowcount = 424827.22498719255,
cumulative cost = {6.216184283941706E9 rows, 0.0 cpu, 0.0 io}, id = 1307
HiveProjectRel(product_name=[$0], item_sk=[$1], store_name=[$2],
store_zip=[$3], b_street_number=[$4], b_streen_name=[$5], b_city=[$6],
b_zip=[$7], c_street_number=[$8], c_street_name=[$9], c_city=[$10],
c_zip=[$11], syear=[$12], cnt=[$13], s1=[$14], s2=[$15], s3=[$16],
item_sk0=[$17], store_name0=[$18], store_zip0=[$19], syear0=[$20], cnt0=[$21],
s10=[$22], s20=[$23], s30=[$24]): rowcount = 1274481.6749615776, cumulative
cost = {6.216184283941706E9 rows, 0.0 cpu, 0.0 io}, id = 3711
HiveJoinRel(condition=[AND(AND(=($1, $17), =($2, $18)), =($3, $19))],
joinType=[inner]): rowcount = 1274481.6749615776, cumulative cost =
{6.216184283941706E9 rows, 0.0 cpu, 0.0 io}, id = 3706
HiveProjectRel(product_name=[$0], item_sk=[$1], store_name=[$2],
store_zip=[$3], b_street_number=[$4], b_streen_name=[$5], b_city=[$6],
b_zip=[$7], c_street_number=[$8], c_street_name=[$9], c_city=[$10],
c_zip=[$11], syear=[$12], cnt=[$15], s1=[$16], s2=[$17], s3=[$18]): rowcount =
800000.0, cumulative cost = {6.214584283941706E9 rows, 0.0 cpu, 0.0 io}, id =
1209
HiveAggregateRel(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14}], agg#0=[count()], agg#1=[sum($15)], agg#2=[sum($16)],
agg#3=[sum($17)]): rowcount = 800000.0, cumulative cost = {6.214584283941706E9
rows, 0.0 cpu, 0.0 io}, id = 1207
HiveProjectRel($f0=[$53], $f1=[$50], $f2=[$27], $f3=[$28],
$f4=[$39], $f5=[$40], $f6=[$41], $f7=[$42], $f8=[$44], $f9=[$45], $f10=[$46],
$f11=[$47], $f12=[$21], $f13=[$23], $f14=[$25], $f15=[$8], $f16=[$9],
$f17=[$10]): rowcount = 3.1094552702569574E8, cumulative cost =
{6.214584283941706E9 rows, 0.0 cpu, 0.0 io}, id = 1205
HiveProjectRel(ss_item_sk=[$0], ss_customer_sk=[$1],
ss_cdemo_sk=[$2], ss_hdemo_sk=[$3], ss_addr_sk=[$4], ss_store_sk=[$5],
ss_promo_sk=[$6], ss_ticket_number=[$7], ss_wholesale_cost=[$8],
ss_list_price=[$9], ss_coupon_amt=[$10], ss_sold_date_sk=[$11],
sr_item_sk=[$12], sr_ticket_number=[$13], c_customer_sk=[$14],
c_current_cdemo_sk=[$15], c_current_hdemo_sk=[$16], c_current_addr_sk=[$17],
c_first_shipto_date_sk=[$18], c_first_sales_date_sk=[$19], d_date_sk=[$20],
d_year=[$21], d_date_sk0=[$22], d_year0=[$23], d_date_sk1=[$24], d_year1=[$25],
s_store_sk=[$26], s_store_name=[$27], s_zip=[$28], cd_demo_sk=[$29],
cd_marital_status=[$30], cd_demo_sk0=[$31], cd_marital_status0=[$32],
p_promo_sk=[$37], hd_demo_sk=[$33], hd_income_band_sk=[$34], hd_demo_sk0=[$38],
hd_income_band_sk0=[$39], ca_address_sk=[$45], ca_street_number=[$46],
ca_street_name=[$47], ca_city=[$48], ca_zip=[$49], ca_address_sk0=[$50],
ca_street_number0=[$51], ca_street_name0=[$52], ca_city0=[$53], ca_zip0=[$54],
ib_income_band_sk=[$35], ib_income_band_sk0=[$40], i_item_sk=[$41],
i_current_price=[$42], i_color=[$43], i_product_name=[$44], cs_item_sk=[$36]):
rowcount = 3.1094552702569574E8, cumulative cost = {6.214584283941706E9 rows,
0.0 cpu, 0.0 io}, id = 3704
HiveJoinRel(condition=[=($17, $50)], joinType=[inner]):
rowcount = 3.1094552702569574E8, cumulative cost = {6.214584283941706E9 rows,
0.0 cpu, 0.0 io}, id = 3702
HiveJoinRel(condition=[=($4, $45)], joinType=[inner]):
rowcount = 3.1094552702569574E8, cumulative cost = {5.90283875691601E9 rows,
0.0 cpu, 0.0 io}, id = 3700
HiveJoinRel(condition=[=($0, $41)], joinType=[inner]):
rowcount = 3.1094552702569574E8, cumulative cost = {5.591093229890314E9 rows,
0.0 cpu, 0.0 io}, id = 3698
HiveJoinRel(condition=[=($16, $38)], joinType=[inner]):
rowcount = 3.1094552702569574E8, cumulative cost = {5.280147701864618E9 rows,
0.0 cpu, 0.0 io}, id = 3696
HiveJoinRel(condition=[=($6, $37)],
joinType=[inner]): rowcount = 4.015947855294367E8, cumulative cost =
{4.878545716335181E9 rows, 0.0 cpu, 0.0 io}, id = 3693
HiveJoinRel(condition=[=($0, $36)],
joinType=[inner]): rowcount = 4.015947855294367E8, cumulative cost =
{4.476950480805744E9 rows, 0.0 cpu, 0.0 io}, id = 3691
HiveJoinRel(condition=[=($3, $33)],
joinType=[inner]): rowcount = 4.015947855294367E8, cumulative cost =
{4.0753556942763076E9 rows, 0.0 cpu, 0.0 io}, id = 3689
HiveFilterRel(condition=[<>($30, $32)]):
rowcount = 5.186708209219766E8, cumulative cost = {3.556677673354331E9 rows,
0.0 cpu, 0.0 io}, id = 1153
HiveProjectRel(ss_item_sk=[$0],
ss_customer_sk=[$1], ss_cdemo_sk=[$2], ss_hdemo_sk=[$3], ss_addr_sk=[$4],
ss_store_sk=[$5], ss_promo_sk=[$6], ss_ticket_number=[$7],
ss_wholesale_cost=[$8], ss_list_price=[$9], ss_coupon_amt=[$10],
ss_sold_date_sk=[$11], sr_item_sk=[$31], sr_ticket_number=[$32],
c_customer_sk=[$21], c_current_cdemo_sk=[$22], c_current_hdemo_sk=[$23],
c_current_addr_sk=[$24], c_first_shipto_date_sk=[$25],
c_first_sales_date_sk=[$26], d_date_sk=[$14], d_year=[$15], d_date_sk0=[$27],
d_year0=[$28], d_date_sk1=[$29], d_year1=[$30], s_store_sk=[$16],
s_store_name=[$17], s_zip=[$18], cd_demo_sk=[$12], cd_marital_status=[$13],
cd_demo_sk0=[$19], cd_marital_status0=[$20]): rowcount = 3.6306957464538364E9,
cumulative cost = {3.556677673354331E9 rows, 0.0 cpu, 0.0 io}, id = 2300
HiveJoinRel(condition=[AND(=($0, $31),
=($7, $32))], joinType=[inner]): rowcount = 3.6306957464538364E9, cumulative
cost = {3.556677673354331E9 rows, 0.0 cpu, 0.0 io}, id = 2298
HiveJoinRel(condition=[=($1, $21)],
joinType=[inner]): rowcount = 1.297271788131117E9, cumulative cost =
{2.203827880223214E9 rows, 0.0 cpu, 0.0 io}, id = 2296
HiveJoinRel(condition=[=($5, $16)],
joinType=[inner]): rowcount = 5.50076554E8, cumulative cost =
{1.6521513262232141E9 rows, 0.0 cpu, 0.0 io}, id = 2289
HiveJoinRel(condition=[=($11, $14)],
joinType=[inner]): rowcount = 5.50076554E8, cumulative cost =
{1.1020745602232141E9 rows, 0.0 cpu, 0.0 io}, id = 2287
HiveJoinRel(condition=[=($2, $12)],
joinType=[inner]): rowcount = 5.50076554E8, cumulative cost = {5.51997354E8
rows, 0.0 cpu, 0.0 io}, id = 1877
HiveProjectRel(ss_item_sk=[$1],
ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5],
ss_store_sk=[$6], ss_promo_sk=[$7], ss_ticket_number=[$8],
ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$18],
ss_sold_date_sk=[$22]): rowcount = 5.50076554E8, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 909
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.store_sales]]):
rowcount = 5.50076554E8, cumulative cost = {0}, id = 55
HiveProjectRel(cd_demo_sk=[$0],
cd_marital_status=[$2]): rowcount = 1920800.0, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1239
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer_demographics]]):
rowcount = 1920800.0, cumulative cost = {0}, id = 56
HiveProjectRel(d_date_sk=[$0],
d_year=[$6]): rowcount = 652.2232142857142, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1129
HiveFilterRel(condition=[=($6,
2000)]): rowcount = 652.2232142857142, cumulative cost = {0.0 rows, 0.0 cpu,
0.0 io}, id = 1127
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.date_dim]]):
rowcount = 73049.0, cumulative cost = {0}, id = 65
HiveProjectRel(s_store_sk=[$0],
s_store_name=[$5], s_zip=[$25]): rowcount = 212.0, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 1235
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.store]]): rowcount
= 212.0, cumulative cost = {0}, id = 54
HiveJoinRel(condition=[=($6, $10)],
joinType=[inner]): rowcount = 1600000.0, cumulative cost = {6866898.0 rows, 0.0
cpu, 0.0 io}, id = 2294
HiveJoinRel(condition=[=($7, $8)],
joinType=[inner]): rowcount = 1600000.0, cumulative cost = {5193849.0 rows, 0.0
cpu, 0.0 io}, id = 2292
HiveJoinRel(condition=[=($3, $0)],
joinType=[inner]): rowcount = 1600000.0, cumulative cost = {3520800.0 rows, 0.0
cpu, 0.0 io}, id = 1886
HiveProjectRel(cd_demo_sk=[$0],
cd_marital_status=[$2]): rowcount = 1920800.0, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1239
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer_demographics]]):
rowcount = 1920800.0, cumulative cost = {0}, id = 56
HiveProjectRel(c_customer_sk=[$0], c_current_cdemo_sk=[$2],
c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5],
c_first_sales_date_sk=[$6]): rowcount = 1600000.0, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 1217
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer]]):
rowcount = 1600000.0, cumulative cost = {0}, id = 59
HiveProjectRel(d_date_sk=[$0],
d_year=[$6]): rowcount = 73049.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0
io}, id = 1227
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.date_dim]]):
rowcount = 73049.0, cumulative cost = {0}, id = 65
HiveProjectRel(d_date_sk=[$0],
d_year=[$6]): rowcount = 73049.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0
io}, id = 1227
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.date_dim]]):
rowcount = 73049.0, cumulative cost = {0}, id = 65
HiveProjectRel(sr_item_sk=[$1],
sr_ticket_number=[$8]): rowcount = 5.5578005E7, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 912
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.store_returns]]):
rowcount = 5.5578005E7, cumulative cost = {0}, id = 62
HiveJoinRel(condition=[=($1, $2)],
joinType=[inner]): rowcount = 7200.0, cumulative cost = {7220.0 rows, 0.0 cpu,
0.0 io}, id = 3424
HiveProjectRel(hd_demo_sk=[$0],
hd_income_band_sk=[$1]): rowcount = 7200.0, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1253
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.household_demographics]]):
rowcount = 7200.0, cumulative cost = {0}, id = 53
HiveProjectRel(ib_income_band_sk=[$0]):
rowcount = 20.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1269
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.income_band]]):
rowcount = 20.0, cumulative cost = {0}, id = 63
HiveProjectRel(cs_item_sk=[$0]): rowcount = 1.0,
cumulative cost = {3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1201
HiveFilterRel(condition=[>($1, *(CAST(2):DOUBLE
NOT NULL, $2))]): rowcount = 1.0, cumulative cost = {3.15348608E8 rows, 0.0
cpu, 0.0 io}, id = 1199
HiveAggregateRel(group=[{0}],
agg#0=[sum($1)], agg#1=[sum($2)]): rowcount = 38846.0, cumulative cost =
{3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1197
HiveProjectRel($f0=[$0], $f1=[$2],
$f2=[+(+($5, $6), $7)]): rowcount = 6.692553251460564E8, cumulative cost =
{3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1195
HiveProjectRel(cs_item_sk=[$0],
cs_order_number=[$1], cs_ext_list_price=[$2], cr_item_sk=[$3],
cr_order_number=[$4], cr_refunded_cash=[$5], cr_reversed_charge=[$6],
cr_store_credit=[$7]): rowcount = 6.692553251460564E8, cumulative cost =
{3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1446
HiveJoinRel(condition=[AND(=($0, $3),
=($1, $4))], joinType=[inner]): rowcount = 6.692553251460564E8, cumulative cost
= {3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1441
HiveProjectRel(cs_item_sk=[$14],
cs_order_number=[$16], cs_ext_list_price=[$24]): rowcount = 2.86549727E8,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1283
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.catalog_sales]]):
rowcount = 2.86549727E8, cumulative cost = {0}, id = 45
HiveProjectRel(cr_item_sk=[$1],
cr_order_number=[$15], cr_refunded_cash=[$22], cr_reversed_charge=[$23],
cr_store_credit=[$24]): rowcount = 2.8798881E7, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 1285
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.catalog_returns]]):
rowcount = 2.8798881E7, cumulative cost = {0}, id = 46
HiveProjectRel(p_promo_sk=[$0]): rowcount = 450.0,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1249
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.promotion]]):
rowcount = 450.0, cumulative cost = {0}, id = 58
HiveJoinRel(condition=[=($1, $2)], joinType=[inner]):
rowcount = 7200.0, cumulative cost = {7220.0 rows, 0.0 cpu, 0.0 io}, id = 3480
HiveProjectRel(hd_demo_sk=[$0],
hd_income_band_sk=[$1]): rowcount = 7200.0, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1253
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.household_demographics]]):
rowcount = 7200.0, cumulative cost = {0}, id = 53
HiveProjectRel(ib_income_band_sk=[$0]): rowcount =
20.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1269
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.income_band]]):
rowcount = 20.0, cumulative cost = {0}, id = 63
HiveProjectRel(i_item_sk=[$0], i_current_price=[$5],
i_color=[$17], i_product_name=[$21]): rowcount = 1.0, cumulative cost = {0.0
rows, 0.0 cpu, 0.0 io}, id = 1185
HiveFilterRel(condition=[AND(in($17, 'maroon',
'burnished', 'dim', 'steel', 'navajo', 'chocolate'), between(false, $5, 35,
+(35, 10)), between(false, $5, +(35, 1), +(35, 15)))]): rowcount = 1.0,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1183
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.item]]): rowcount =
48000.0, cumulative cost = {0}, id = 68
HiveProjectRel(ca_address_sk=[$0], ca_street_number=[$2],
ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]): rowcount = 800000.0,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1261
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer_address]]):
rowcount = 800000.0, cumulative cost = {0}, id = 61
HiveProjectRel(ca_address_sk=[$0], ca_street_number=[$2],
ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]): rowcount = 800000.0,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1261
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer_address]]):
rowcount = 800000.0, cumulative cost = {0}, id = 61
HiveProjectRel(item_sk=[$1], store_name=[$2], store_zip=[$3],
syear=[$12], cnt=[$15], s1=[$16], s2=[$17], s3=[$18]): rowcount = 800000.0,
cumulative cost = {6.214584283941706E9 rows, 0.0 cpu, 0.0 io}, id = 1303
HiveAggregateRel(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14}], agg#0=[count()], agg#1=[sum($15)], agg#2=[sum($16)],
agg#3=[sum($17)]): rowcount = 800000.0, cumulative cost = {6.214584283941706E9
rows, 0.0 cpu, 0.0 io}, id = 1301
HiveProjectRel($f0=[$53], $f1=[$50], $f2=[$27], $f3=[$28],
$f4=[$39], $f5=[$40], $f6=[$41], $f7=[$42], $f8=[$44], $f9=[$45], $f10=[$46],
$f11=[$47], $f12=[$21], $f13=[$23], $f14=[$25], $f15=[$8], $f16=[$9],
$f17=[$10]): rowcount = 3.1094552702569574E8, cumulative cost =
{6.214584283941706E9 rows, 0.0 cpu, 0.0 io}, id = 1299
HiveProjectRel(ss_item_sk=[$0], ss_customer_sk=[$1],
ss_cdemo_sk=[$2], ss_hdemo_sk=[$3], ss_addr_sk=[$4], ss_store_sk=[$5],
ss_promo_sk=[$6], ss_ticket_number=[$7], ss_wholesale_cost=[$8],
ss_list_price=[$9], ss_coupon_amt=[$10], ss_sold_date_sk=[$11],
sr_item_sk=[$12], sr_ticket_number=[$13], c_customer_sk=[$14],
c_current_cdemo_sk=[$15], c_current_hdemo_sk=[$16], c_current_addr_sk=[$17],
c_first_shipto_date_sk=[$18], c_first_sales_date_sk=[$19], d_date_sk=[$20],
d_year=[$21], d_date_sk0=[$22], d_year0=[$23], d_date_sk1=[$24], d_year1=[$25],
s_store_sk=[$26], s_store_name=[$27], s_zip=[$28], cd_demo_sk=[$29],
cd_marital_status=[$30], cd_demo_sk0=[$31], cd_marital_status0=[$32],
p_promo_sk=[$37], hd_demo_sk=[$33], hd_income_band_sk=[$34], hd_demo_sk0=[$38],
hd_income_band_sk0=[$39], ca_address_sk=[$45], ca_street_number=[$46],
ca_street_name=[$47], ca_city=[$48], ca_zip=[$49], ca_address_sk0=[$50],
ca_street_number0=[$51], ca_street_name0=[$52], ca_city0=[$53], ca_zip0=[$54],
ib_income_band_sk=[$35], ib_income_band_sk0=[$40], i_item_sk=[$41],
i_current_price=[$42], i_color=[$43], i_product_name=[$44], cs_item_sk=[$36]):
rowcount = 3.1094552702569574E8, cumulative cost = {6.214584283941706E9 rows,
0.0 cpu, 0.0 io}, id = 3002
HiveJoinRel(condition=[=($17, $50)], joinType=[inner]):
rowcount = 3.1094552702569574E8, cumulative cost = {6.214584283941706E9 rows,
0.0 cpu, 0.0 io}, id = 3000
HiveJoinRel(condition=[=($4, $45)], joinType=[inner]):
rowcount = 3.1094552702569574E8, cumulative cost = {5.90283875691601E9 rows,
0.0 cpu, 0.0 io}, id = 2998
HiveJoinRel(condition=[=($0, $41)], joinType=[inner]):
rowcount = 3.1094552702569574E8, cumulative cost = {5.591093229890314E9 rows,
0.0 cpu, 0.0 io}, id = 2996
HiveJoinRel(condition=[=($16, $38)], joinType=[inner]):
rowcount = 3.1094552702569574E8, cumulative cost = {5.280147701864618E9 rows,
0.0 cpu, 0.0 io}, id = 2994
HiveJoinRel(condition=[=($6, $37)],
joinType=[inner]): rowcount = 4.015947855294367E8, cumulative cost =
{4.878545716335181E9 rows, 0.0 cpu, 0.0 io}, id = 2991
HiveJoinRel(condition=[=($0, $36)],
joinType=[inner]): rowcount = 4.015947855294367E8, cumulative cost =
{4.476950480805744E9 rows, 0.0 cpu, 0.0 io}, id = 2989
HiveJoinRel(condition=[=($3, $33)],
joinType=[inner]): rowcount = 4.015947855294367E8, cumulative cost =
{4.0753556942763076E9 rows, 0.0 cpu, 0.0 io}, id = 2987
HiveFilterRel(condition=[<>($30, $32)]):
rowcount = 5.186708209219766E8, cumulative cost = {3.556677673354331E9 rows,
0.0 cpu, 0.0 io}, id = 1247
HiveProjectRel(ss_item_sk=[$0],
ss_customer_sk=[$1], ss_cdemo_sk=[$2], ss_hdemo_sk=[$3], ss_addr_sk=[$4],
ss_store_sk=[$5], ss_promo_sk=[$6], ss_ticket_number=[$7],
ss_wholesale_cost=[$8], ss_list_price=[$9], ss_coupon_amt=[$10],
ss_sold_date_sk=[$11], sr_item_sk=[$31], sr_ticket_number=[$32],
c_customer_sk=[$21], c_current_cdemo_sk=[$22], c_current_hdemo_sk=[$23],
c_current_addr_sk=[$24], c_first_shipto_date_sk=[$25],
c_first_sales_date_sk=[$26], d_date_sk=[$14], d_year=[$15], d_date_sk0=[$27],
d_year0=[$28], d_date_sk1=[$29], d_year1=[$30], s_store_sk=[$16],
s_store_name=[$17], s_zip=[$18], cd_demo_sk=[$12], cd_marital_status=[$13],
cd_demo_sk0=[$19], cd_marital_status0=[$20]): rowcount = 3.6306957464538364E9,
cumulative cost = {3.556677673354331E9 rows, 0.0 cpu, 0.0 io}, id = 1873
HiveJoinRel(condition=[AND(=($0, $31),
=($7, $32))], joinType=[inner]): rowcount = 3.6306957464538364E9, cumulative
cost = {3.556677673354331E9 rows, 0.0 cpu, 0.0 io}, id = 1871
HiveJoinRel(condition=[=($1, $21)],
joinType=[inner]): rowcount = 1.297271788131117E9, cumulative cost =
{2.203827880223214E9 rows, 0.0 cpu, 0.0 io}, id = 1869
HiveJoinRel(condition=[=($5, $16)],
joinType=[inner]): rowcount = 5.50076554E8, cumulative cost =
{1.6521513262232141E9 rows, 0.0 cpu, 0.0 io}, id = 1862
HiveJoinRel(condition=[=($11, $14)],
joinType=[inner]): rowcount = 5.50076554E8, cumulative cost =
{1.1020745602232141E9 rows, 0.0 cpu, 0.0 io}, id = 1860
HiveJoinRel(condition=[=($2, $12)],
joinType=[inner]): rowcount = 5.50076554E8, cumulative cost = {5.51997354E8
rows, 0.0 cpu, 0.0 io}, id = 1450
HiveProjectRel(ss_item_sk=[$1],
ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5],
ss_store_sk=[$6], ss_promo_sk=[$7], ss_ticket_number=[$8],
ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$18],
ss_sold_date_sk=[$22]): rowcount = 5.50076554E8, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 909
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.store_sales]]):
rowcount = 5.50076554E8, cumulative cost = {0}, id = 55
HiveProjectRel(cd_demo_sk=[$0],
cd_marital_status=[$2]): rowcount = 1920800.0, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1239
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer_demographics]]):
rowcount = 1920800.0, cumulative cost = {0}, id = 56
HiveProjectRel(d_date_sk=[$0],
d_year=[$6]): rowcount = 652.2232142857142, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1223
HiveFilterRel(condition=[=($6,
+(2000, 1))]): rowcount = 652.2232142857142, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1221
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.date_dim]]):
rowcount = 73049.0, cumulative cost = {0}, id = 65
HiveProjectRel(s_store_sk=[$0],
s_store_name=[$5], s_zip=[$25]): rowcount = 212.0, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 1235
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.store]]): rowcount
= 212.0, cumulative cost = {0}, id = 54
HiveJoinRel(condition=[=($6, $10)],
joinType=[inner]): rowcount = 1600000.0, cumulative cost = {6866898.0 rows, 0.0
cpu, 0.0 io}, id = 1867
HiveJoinRel(condition=[=($7, $8)],
joinType=[inner]): rowcount = 1600000.0, cumulative cost = {5193849.0 rows, 0.0
cpu, 0.0 io}, id = 1865
HiveJoinRel(condition=[=($3, $0)],
joinType=[inner]): rowcount = 1600000.0, cumulative cost = {3520800.0 rows, 0.0
cpu, 0.0 io}, id = 1459
HiveProjectRel(cd_demo_sk=[$0],
cd_marital_status=[$2]): rowcount = 1920800.0, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1239
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer_demographics]]):
rowcount = 1920800.0, cumulative cost = {0}, id = 56
HiveProjectRel(c_customer_sk=[$0], c_current_cdemo_sk=[$2],
c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5],
c_first_sales_date_sk=[$6]): rowcount = 1600000.0, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 1217
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer]]):
rowcount = 1600000.0, cumulative cost = {0}, id = 59
HiveProjectRel(d_date_sk=[$0],
d_year=[$6]): rowcount = 73049.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0
io}, id = 1227
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.date_dim]]):
rowcount = 73049.0, cumulative cost = {0}, id = 65
HiveProjectRel(d_date_sk=[$0],
d_year=[$6]): rowcount = 73049.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0
io}, id = 1227
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.date_dim]]):
rowcount = 73049.0, cumulative cost = {0}, id = 65
HiveProjectRel(sr_item_sk=[$1],
sr_ticket_number=[$8]): rowcount = 5.5578005E7, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 912
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.store_returns]]):
rowcount = 5.5578005E7, cumulative cost = {0}, id = 62
HiveJoinRel(condition=[=($1, $2)],
joinType=[inner]): rowcount = 7200.0, cumulative cost = {7220.0 rows, 0.0 cpu,
0.0 io}, id = 2722
HiveProjectRel(hd_demo_sk=[$0],
hd_income_band_sk=[$1]): rowcount = 7200.0, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1253
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.household_demographics]]):
rowcount = 7200.0, cumulative cost = {0}, id = 53
HiveProjectRel(ib_income_band_sk=[$0]):
rowcount = 20.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1269
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.income_band]]):
rowcount = 20.0, cumulative cost = {0}, id = 63
HiveProjectRel(cs_item_sk=[$0]): rowcount = 1.0,
cumulative cost = {3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1295
HiveFilterRel(condition=[>($1, *(CAST(2):DOUBLE
NOT NULL, $2))]): rowcount = 1.0, cumulative cost = {3.15348608E8 rows, 0.0
cpu, 0.0 io}, id = 1293
HiveAggregateRel(group=[{0}],
agg#0=[sum($1)], agg#1=[sum($2)]): rowcount = 38846.0, cumulative cost =
{3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1291
HiveProjectRel($f0=[$0], $f1=[$2],
$f2=[+(+($5, $6), $7)]): rowcount = 6.692553251460564E8, cumulative cost =
{3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1289
HiveProjectRel(cs_item_sk=[$0],
cs_order_number=[$1], cs_ext_list_price=[$2], cr_item_sk=[$3],
cr_order_number=[$4], cr_refunded_cash=[$5], cr_reversed_charge=[$6],
cr_store_credit=[$7]): rowcount = 6.692553251460564E8, cumulative cost =
{3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1439
HiveJoinRel(condition=[AND(=($0, $3),
=($1, $4))], joinType=[inner]): rowcount = 6.692553251460564E8, cumulative cost
= {3.15348608E8 rows, 0.0 cpu, 0.0 io}, id = 1434
HiveProjectRel(cs_item_sk=[$14],
cs_order_number=[$16], cs_ext_list_price=[$24]): rowcount = 2.86549727E8,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1283
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.catalog_sales]]):
rowcount = 2.86549727E8, cumulative cost = {0}, id = 45
HiveProjectRel(cr_item_sk=[$1],
cr_order_number=[$15], cr_refunded_cash=[$22], cr_reversed_charge=[$23],
cr_store_credit=[$24]): rowcount = 2.8798881E7, cumulative cost = {0.0 rows,
0.0 cpu, 0.0 io}, id = 1285
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.catalog_returns]]):
rowcount = 2.8798881E7, cumulative cost = {0}, id = 46
HiveProjectRel(p_promo_sk=[$0]): rowcount = 450.0,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1249
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.promotion]]):
rowcount = 450.0, cumulative cost = {0}, id = 58
HiveJoinRel(condition=[=($1, $2)], joinType=[inner]):
rowcount = 7200.0, cumulative cost = {7220.0 rows, 0.0 cpu, 0.0 io}, id = 2778
HiveProjectRel(hd_demo_sk=[$0],
hd_income_band_sk=[$1]): rowcount = 7200.0, cumulative cost = {0.0 rows, 0.0
cpu, 0.0 io}, id = 1253
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.household_demographics]]):
rowcount = 7200.0, cumulative cost = {0}, id = 53
HiveProjectRel(ib_income_band_sk=[$0]): rowcount =
20.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1269
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.income_band]]):
rowcount = 20.0, cumulative cost = {0}, id = 63
HiveProjectRel(i_item_sk=[$0], i_current_price=[$5],
i_color=[$17], i_product_name=[$21]): rowcount = 1.0, cumulative cost = {0.0
rows, 0.0 cpu, 0.0 io}, id = 1279
HiveFilterRel(condition=[AND(in($17, 'maroon',
'burnished', 'dim', 'steel', 'navajo', 'chocolate'), between(false, $5, 35,
+(35, 10)), between(false, $5, +(35, 1), +(35, 15)))]): rowcount = 1.0,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1277
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.item]]): rowcount =
48000.0, cumulative cost = {0}, id = 68
HiveProjectRel(ca_address_sk=[$0], ca_street_number=[$2],
ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]): rowcount = 800000.0,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1261
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer_address]]):
rowcount = 800000.0, cumulative cost = {0}, id = 61
HiveProjectRel(ca_address_sk=[$0], ca_street_number=[$2],
ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]): rowcount = 800000.0,
cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 1261
HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.customer_address]]):
rowcount = 800000.0, cumulative cost = {0}, id = 61
{code}
> CBO : Negate condition underestimates selectivity which results in an
> in-efficient plan
> ---------------------------------------------------------------------------------------
>
> Key: HIVE-8315
> URL: https://issues.apache.org/jira/browse/HIVE-8315
> Project: Hive
> Issue Type: Bug
> Components: CBO
> Affects Versions: 0.14.0
> Reporter: Mostafa Mokhtar
> Assignee: Harish Butani
> Fix For: 0.14.0
>
> Attachments: HIVE-8315.1.patch
>
>
> For TPC-DS Q64 the predicate cd1.cd_marital_status <> cd2.cd_marital_status
> under estimate the join selectivity by a huge margin and results in
> in-efficient join order.
> This is a subset of the logical plan showing that item was joined very last
> {code}
> HiveJoinRel(condition=[=($0, $37)],
> joinType=[inner]): rowcount = 1.0, cumulative cost = {6.386017602518958E8
> rows, 0.0 cpu, 0.0 io}, id = 3790
> HiveJoinRel(condition=[=($0, $33)],
> joinType=[inner]): rowcount = 1.0, cumulative cost = {6.386017582518958E8
> rows, 0.0 cpu, 0.0 io}, id = 3067
> HiveFilterRel(condition=[<>($30, $32)]):
> rowcount = 1.8252236387887635, cumulative cost = {6.386017554266721E8 rows,
> 0.0 cpu, 0.0 io}, id = 1153
> HiveProjectRel(ss_item_sk=[$2],
> ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6],
> ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9],
> ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$12],
> ss_sold_date_sk=[$13], sr_item_sk=[$0], sr_ticket_number=[$1],
> c_customer_sk=[$23], c_current_cdemo_sk=[$24], c_current_hdemo_sk=[$25],
> c_current_addr_sk=[$26], c_first_shipto_date_sk=[$27],
> c_first_sales_date_sk=[$28], d_date_sk=[$14], d_year=[$15], d_date_sk0=[$29],
> d_year0=[$30], d_date_sk1=[$31], d_year1=[$32], s_store_sk=[$18],
> s_store_name=[$19], s_zip=[$20], cd_demo_sk=[$16], cd_marital_status=[$17],
> cd_demo_sk0=[$21], cd_marital_status0=[$22]): rowcount =
> 3.6246005783468924E7, cumulative cost = {6.386017554266721E8 rows, 0.0 cpu,
> 0.0 io}, id = 2312
> HiveJoinRel(condition=[AND(=($2, $0),
> =($9, $1))], joinType=[inner]): rowcount = 3.6246005783468924E7, cumulative
> cost = {6.386017554266721E8 rows, 0.0 cpu, 0.0 io}, id = 2310
> HiveProjectRel(sr_item_sk=[$1],
> sr_ticket_number=[$8]): rowcount = 5.5578005E7, cumulative cost = {0.0 rows,
> 0.0 cpu, 0.0 io}, id = 912
>
> HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.store_returns]]):
> rowcount = 5.5578005E7, cumulative cost = {0}, id = 62
> HiveJoinRel(condition=[=($1, $21)],
> joinType=[inner]): rowcount = 1.2950939439433252E7, cumulative cost =
> {5.700728109872389E8 rows, 0.0 cpu, 0.0 io}, id = 2308
> HiveJoinRel(condition=[=($5,
> $16)], joinType=[inner]): rowcount = 5491530.921341597, cumulative cost =
> {5.629812800658973E8 rows, 0.0 cpu, 0.0 io}, id = 2301
> HiveJoinRel(condition=[=($2,
> $14)], joinType=[inner]): rowcount = 5491530.921341597, cumulative cost =
> {5.574895371445558E8 rows, 0.0 cpu, 0.0 io}, id = 2299
> HiveJoinRel(condition=[=($11,
> $12)], joinType=[inner]): rowcount = 5491530.921341597, cumulative cost =
> {5.500772062232143E8 rows, 0.0 cpu, 0.0 io}, id = 1898
>
> HiveProjectRel(ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3],
> ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_promo_sk=[$7],
> ss_ticket_number=[$8], ss_wholesale_cost=[$10], ss_list_price=[$11],
> ss_coupon_amt=[$18], ss_sold_date_sk=[$22]): rowcount = 5.50076554E8,
> cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io}, id = 909
>
> HiveTableScanRel(table=[[tpcds_bin_partitioned_orc_200_orig.store_sales]]):
> rowcount = 5.50076554E8, cumulative cost = {0}, id = 55{code}
> Query
> {code}
> select cs1.product_name ,cs1.store_name ,cs1.store_zip ,cs1.b_street_number
> ,cs1.b_streen_name ,cs1.b_city
> ,cs1.b_zip ,cs1.c_street_number ,cs1.c_street_name ,cs1.c_city
> ,cs1.c_zip ,cs1.syear ,cs1.cnt
> ,cs1.s1 ,cs1.s2 ,cs1.s3
> ,cs2.s1 ,cs2.s2 ,cs2.s3 ,cs2.syear ,cs2.cnt
> from
> (select i_product_name as product_name ,i_item_sk as item_sk ,s_store_name as
> store_name
> ,s_zip as store_zip ,ad1.ca_street_number as b_street_number
> ,ad1.ca_street_name as b_streen_name
> ,ad1.ca_city as b_city ,ad1.ca_zip as b_zip ,ad2.ca_street_number as
> c_street_number
> ,ad2.ca_street_name as c_street_name ,ad2.ca_city as c_city ,ad2.ca_zip
> as c_zip
> ,d1.d_year as syear ,d2.d_year as fsyear ,d3.d_year as s2year ,count(*)
> as cnt
> ,sum(ss_wholesale_cost) as s1 ,sum(ss_list_price) as s2
> ,sum(ss_coupon_amt) as s3
> FROM store_sales
> JOIN store_returns ON store_sales.ss_item_sk =
> store_returns.sr_item_sk and store_sales.ss_ticket_number =
> store_returns.sr_ticket_number
> JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk
> JOIN date_dim d1 ON store_sales.ss_sold_date_sk = d1.d_date_sk
> JOIN date_dim d2 ON customer.c_first_sales_date_sk = d2.d_date_sk
> JOIN date_dim d3 ON customer.c_first_shipto_date_sk = d3.d_date_sk
> JOIN store ON store_sales.ss_store_sk = store.s_store_sk
> JOIN customer_demographics cd1 ON store_sales.ss_cdemo_sk=
> cd1.cd_demo_sk
> JOIN customer_demographics cd2 ON customer.c_current_cdemo_sk =
> cd2.cd_demo_sk
> JOIN promotion ON store_sales.ss_promo_sk = promotion.p_promo_sk
> JOIN household_demographics hd1 ON store_sales.ss_hdemo_sk =
> hd1.hd_demo_sk
> JOIN household_demographics hd2 ON customer.c_current_hdemo_sk =
> hd2.hd_demo_sk
> JOIN customer_address ad1 ON store_sales.ss_addr_sk =
> ad1.ca_address_sk
> JOIN customer_address ad2 ON customer.c_current_addr_sk =
> ad2.ca_address_sk
> JOIN income_band ib1 ON hd1.hd_income_band_sk = ib1.ib_income_band_sk
> JOIN income_band ib2 ON hd2.hd_income_band_sk = ib2.ib_income_band_sk
> JOIN item ON store_sales.ss_item_sk = item.i_item_sk
> JOIN
> (select cs_item_sk
> ,sum(cs_ext_list_price) as
> sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund
> from catalog_sales JOIN catalog_returns
> ON catalog_sales.cs_item_sk = catalog_returns.cr_item_sk
> and catalog_sales.cs_order_number = catalog_returns.cr_order_number
> group by cs_item_sk
> having
> sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit))
> cs_ui
> ON store_sales.ss_item_sk = cs_ui.cs_item_sk
> WHERE
> cd1.cd_marital_status <> cd2.cd_marital_status and
> i_color in ('maroon','burnished','dim','steel','navajo','chocolate')
> and
> i_current_price between 35 and 35 + 10 and
> i_current_price between 35 + 1 and 35 + 15
> group by i_product_name ,i_item_sk ,s_store_name ,s_zip ,ad1.ca_street_number
> ,ad1.ca_street_name ,ad1.ca_city ,ad1.ca_zip ,ad2.ca_street_number
> ,ad2.ca_street_name ,ad2.ca_city ,ad2.ca_zip ,d1.d_year ,d2.d_year
> ,d3.d_year
> ) cs1
> JOIN
> (select i_product_name as product_name ,i_item_sk as item_sk ,s_store_name as
> store_name
> ,s_zip as store_zip ,ad1.ca_street_number as b_street_number
> ,ad1.ca_street_name as b_streen_name
> ,ad1.ca_city as b_city ,ad1.ca_zip as b_zip ,ad2.ca_street_number as
> c_street_number
> ,ad2.ca_street_name as c_street_name ,ad2.ca_city as c_city ,ad2.ca_zip
> as c_zip
> ,d1.d_year as syear ,d2.d_year as fsyear ,d3.d_year as s2year ,count(*)
> as cnt
> ,sum(ss_wholesale_cost) as s1 ,sum(ss_list_price) as s2
> ,sum(ss_coupon_amt) as s3
> FROM store_sales
> JOIN store_returns ON store_sales.ss_item_sk =
> store_returns.sr_item_sk and store_sales.ss_ticket_number =
> store_returns.sr_ticket_number
> JOIN customer ON store_sales.ss_customer_sk = customer.c_customer_sk
> JOIN date_dim d1 ON store_sales.ss_sold_date_sk = d1.d_date_sk
> JOIN date_dim d2 ON customer.c_first_sales_date_sk = d2.d_date_sk
> JOIN date_dim d3 ON customer.c_first_shipto_date_sk = d3.d_date_sk
> JOIN store ON store_sales.ss_store_sk = store.s_store_sk
> JOIN customer_demographics cd1 ON store_sales.ss_cdemo_sk=
> cd1.cd_demo_sk
> JOIN customer_demographics cd2 ON customer.c_current_cdemo_sk =
> cd2.cd_demo_sk
> JOIN promotion ON store_sales.ss_promo_sk = promotion.p_promo_sk
> JOIN household_demographics hd1 ON store_sales.ss_hdemo_sk =
> hd1.hd_demo_sk
> JOIN household_demographics hd2 ON customer.c_current_hdemo_sk =
> hd2.hd_demo_sk
> JOIN customer_address ad1 ON store_sales.ss_addr_sk =
> ad1.ca_address_sk
> JOIN customer_address ad2 ON customer.c_current_addr_sk =
> ad2.ca_address_sk
> JOIN income_band ib1 ON hd1.hd_income_band_sk = ib1.ib_income_band_sk
> JOIN income_band ib2 ON hd2.hd_income_band_sk = ib2.ib_income_band_sk
> JOIN item ON store_sales.ss_item_sk = item.i_item_sk
> JOIN
> (select cs_item_sk
> ,sum(cs_ext_list_price) as
> sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund
> from catalog_sales JOIN catalog_returns
> ON catalog_sales.cs_item_sk = catalog_returns.cr_item_sk
> and catalog_sales.cs_order_number = catalog_returns.cr_order_number
> group by cs_item_sk
> having
> sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit))
> cs_ui
> ON store_sales.ss_item_sk = cs_ui.cs_item_sk
> WHERE
> cd1.cd_marital_status <> cd2.cd_marital_status and
> i_color in ('maroon','burnished','dim','steel','navajo','chocolate')
> and
> i_current_price between 35 and 35 + 10 and
> i_current_price between 35 + 1 and 35 + 15
> group by i_product_name ,i_item_sk ,s_store_name ,s_zip ,ad1.ca_street_number
> ,ad1.ca_street_name ,ad1.ca_city ,ad1.ca_zip ,ad2.ca_street_number
> ,ad2.ca_street_name ,ad2.ca_city ,ad2.ca_zip ,d1.d_year ,d2.d_year
> ,d3.d_year
> ) cs2
> ON cs1.item_sk=cs2.item_sk
> where
> cs1.syear = 2000 and
> cs2.syear = 2000 + 1 and
> cs2.cnt <= cs1.cnt and
> cs1.store_name = cs2.store_name and
> cs1.store_zip = cs2.store_zip
> order by cs1.product_name ,cs1.store_name ,cs2.cnt
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)