Repository: hive Updated Branches: refs/heads/master b42fdc20d -> 9493dcfd4
http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/perf/tez/query54.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out index a706e94..a029634 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -1,6 +1,6 @@ Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 30' is a cross product Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with my_customers as ( @@ -133,28 +133,26 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 14 (BROADCAST_EDGE) -Map 16 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) -Map 22 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 23 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) +Map 15 <- Reducer 23 (BROADCAST_EDGE), Union 16 (CONTAINS) +Map 21 <- Reducer 23 (BROADCAST_EDGE), Union 16 (CONTAINS) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 17 <- Map 22 (SIMPLE_EDGE), Union 16 (SIMPLE_EDGE) +Reducer 18 <- Map 24 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 27 (SIMPLE_EDGE) -Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 26 (SIMPLE_EDGE) Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 27 (SIMPLE_EDGE) -Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 32 <- Map 27 (SIMPLE_EDGE) -Reducer 33 <- Reducer 32 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 26 (SIMPLE_EDGE) +Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 27 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -164,25 +162,25 @@ Stage-0 limit:100 Stage-1 Reducer 9 vectorized - File Output Operator [FS_351] - Limit [LIM_350] (rows=1 width=16) + File Output Operator [FS_349] + Limit [LIM_348] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_349] (rows=1 width=16) + Select Operator [SEL_347] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_348] - Select Operator [SEL_347] (rows=1 width=16) + SHUFFLE [RS_346] + Select Operator [SEL_345] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_346] (rows=1 width=12) + Group By Operator [GBY_344] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] + SHUFFLE [RS_343] PartitionCols:_col0 - Group By Operator [GBY_344] (rows=1 width=12) + Group By Operator [GBY_342] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_343] (rows=1 width=116) + Select Operator [SEL_341] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_342] (rows=1 width=116) + Group By Operator [GBY_340] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_119] @@ -197,82 +195,82 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"] Merge Join Operator [MERGEJOIN_273] (rows=5618315000 width=127) Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_112] Merge Join Operator [MERGEJOIN_270] (rows=25 width=4) Conds:(Right Outer),Output:["_col0"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] - Group By Operator [GBY_332] (rows=25 width=4) + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_331] + Group By Operator [GBY_330] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_319] PartitionCols:_col0 - Group By Operator [GBY_318] (rows=25 width=4) + Group By Operator [GBY_316] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_315] (rows=50 width=12) + Select Operator [SEL_313] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_313] (rows=50 width=12) + Filter Operator [FIL_311] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_73] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_341] - Select Operator [SEL_340] (rows=1 width=8) - Filter Operator [FIL_339] (rows=1 width=8) + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_339] + Select Operator [SEL_338] (rows=1 width=8) + Filter Operator [FIL_337] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_338] (rows=1 width=8) + Group By Operator [GBY_336] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_337] - Group By Operator [GBY_336] (rows=1 width=8) + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_335] + Group By Operator [GBY_334] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_335] (rows=25 width=4) - Group By Operator [GBY_334] (rows=25 width=4) + Select Operator [SEL_333] (rows=25 width=4) + Group By Operator [GBY_332] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0 - Group By Operator [GBY_319] (rows=25 width=4) + Group By Operator [GBY_317] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_316] (rows=50 width=12) + Select Operator [SEL_314] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_313] + Please refer to the previous Filter Operator [FIL_311] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_113] Select Operator [SEL_108] (rows=224732600 width=119) Output:["_col0","_col4","_col11","_col13"] Merge Join Operator [MERGEJOIN_272] (rows=224732600 width=119) Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_325] - Group By Operator [GBY_323] (rows=25 width=4) + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_323] + Group By Operator [GBY_321] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] PartitionCols:_col0 - Group By Operator [GBY_317] (rows=25 width=4) + Group By Operator [GBY_315] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_314] (rows=50 width=12) + Select Operator [SEL_312] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_313] + Please refer to the previous Filter Operator [FIL_311] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_105] Merge Join Operator [MERGEJOIN_271] (rows=8989304 width=8) Conds:(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_331] - Select Operator [SEL_330] (rows=1 width=8) - Filter Operator [FIL_329] (rows=1 width=8) + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_329] + Select Operator [SEL_328] (rows=1 width=8) + Filter Operator [FIL_327] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_328] (rows=1 width=8) + Group By Operator [GBY_326] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_327] - Group By Operator [GBY_326] (rows=1 width=8) + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_325] + Group By Operator [GBY_324] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_324] (rows=25 width=4) - Please refer to the previous Group By Operator [GBY_323] + Select Operator [SEL_322] (rows=25 width=4) + Please refer to the previous Group By Operator [GBY_321] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_102] Merge Join Operator [MERGEJOIN_269] (rows=8989304 width=8) @@ -281,145 +279,134 @@ Stage-0 SHUFFLE [RS_100] PartitionCols:_col5 Merge Join Operator [MERGEJOIN_268] (rows=55046 width=4) - Conds:RS_69._col0=RS_304._col1(Inner),Output:["_col5"] + Conds:RS_69._col0=RS_310._col1(Inner),Output:["_col5"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_264] (rows=39720279 width=4) - Conds:RS_286._col1, _col2=RS_289._col0, _col1(Inner),Output:["_col0"] + Conds:RS_292._col1, _col2=RS_295._col0, _col1(Inner),Output:["_col0"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + SHUFFLE [RS_292] PartitionCols:_col1, _col2 - Select Operator [SEL_285] (rows=40000000 width=188) + Select Operator [SEL_291] (rows=40000000 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_284] (rows=40000000 width=188) + Filter Operator [FIL_290] (rows=40000000 width=188) predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) TableScan [TS_29] (rows=40000000 width=188) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] PartitionCols:_col0, _col1 - Select Operator [SEL_288] (rows=1704 width=184) + Select Operator [SEL_294] (rows=1704 width=184) Output:["_col0","_col1"] - Filter Operator [FIL_287] (rows=1704 width=184) + Filter Operator [FIL_293] (rows=1704 width=184) predicate:(s_county is not null and s_state is not null) TableScan [TS_32] (rows=1704 width=184) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] - <-Reducer 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_304] + <-Reducer 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] PartitionCols:_col1 - Select Operator [SEL_303] (rows=55046 width=8) + Select Operator [SEL_309] (rows=55046 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_302] (rows=55046 width=8) + Group By Operator [GBY_308] (rows=55046 width=8) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 20 [SIMPLE_EDGE] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0, _col1 Group By Operator [GBY_62] (rows=55046 width=8) Output:["_col0","_col1"],keys:_col6, _col5 Merge Join Operator [MERGEJOIN_267] (rows=110092 width=8) - Conds:RS_58._col1=RS_301._col0(Inner),Output:["_col5","_col6"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] + Conds:RS_58._col1=RS_307._col0(Inner),Output:["_col5","_col6"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] PartitionCols:_col0 - Select Operator [SEL_300] (rows=80000000 width=8) + Select Operator [SEL_306] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_299] (rows=80000000 width=8) + Filter Operator [FIL_305] (rows=80000000 width=8) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_49] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 19 [SIMPLE_EDGE] + <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_266] (rows=110092 width=0) - Conds:RS_55._col2=RS_298._col0(Inner),Output:["_col1"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + Conds:RS_55._col2=RS_304._col0(Inner),Output:["_col1"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_304] PartitionCols:_col0 - Select Operator [SEL_297] (rows=453 width=4) + Select Operator [SEL_303] (rows=453 width=4) Output:["_col0"] - Filter Operator [FIL_296] (rows=453 width=186) + Filter Operator [FIL_302] (rows=453 width=186) predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) TableScan [TS_46] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 18 [SIMPLE_EDGE] + <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_265] (rows=11665117 width=7) - Conds:Union 17._col0=RS_292._col0(Inner),Output:["_col1","_col2"] - <-Map 23 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_292] + Conds:Union 16._col0=RS_298._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_298] PartitionCols:_col0 - Select Operator [SEL_291] (rows=50 width=4) + Select Operator [SEL_297] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_290] (rows=50 width=12) + Filter Operator [FIL_296] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) TableScan [TS_43] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Union 17 [SIMPLE_EDGE] - <-Map 16 [CONTAINS] vectorized - Reduce Output Operator [RS_357] + <-Union 16 [SIMPLE_EDGE] + <-Map 15 [CONTAINS] vectorized + Reduce Output Operator [RS_355] PartitionCols:_col0 - Select Operator [SEL_356] (rows=285117831 width=11) + Select Operator [SEL_354] (rows=285117831 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_355] (rows=285117831 width=11) + Filter Operator [FIL_353] (rows=285117831 width=11) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_274] (rows=287989836 width=11) Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_353] - Group By Operator [GBY_352] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_351] + Group By Operator [GBY_350] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] - Group By Operator [GBY_294] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_301] + Group By Operator [GBY_300] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_293] (rows=50 width=4) + Select Operator [SEL_299] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_291] - <-Map 22 [CONTAINS] vectorized - Reduce Output Operator [RS_360] + Please refer to the previous Select Operator [SEL_297] + <-Map 21 [CONTAINS] vectorized + Reduce Output Operator [RS_358] PartitionCols:_col0 - Select Operator [SEL_359] (rows=143930993 width=11) + Select Operator [SEL_357] (rows=143930993 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_358] (rows=143930993 width=11) + Filter Operator [FIL_356] (rows=143930993 width=11) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) TableScan [TS_279] (rows=144002668 width=11) Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_354] - Please refer to the previous Group By Operator [GBY_352] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_352] + Please refer to the previous Group By Operator [GBY_350] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_99] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_263] (rows=525327388 width=114) - Conds:RS_309._col0=RS_312._col0(Inner),Output:["_col1","_col2","_col4"] + Conds:RS_286._col0=RS_289._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] + SHUFFLE [RS_286] PartitionCols:_col0 - Select Operator [SEL_308] (rows=525327388 width=114) + Select Operator [SEL_285] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_307] (rows=525327388 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_100_customer_c_customer_sk_min) AND DynamicValue(RS_100_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_100_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + Filter Operator [FIL_284] (rows=525327388 width=114) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_23] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_306] - Group By Operator [GBY_305] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_183] - Group By Operator [GBY_182] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_181] (rows=55046 width=8) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_268] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] + SHUFFLE [RS_289] PartitionCols:_col0 - Select Operator [SEL_311] (rows=73049 width=8) + Select Operator [SEL_288] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_310] (rows=73049 width=8) + Filter Operator [FIL_287] (rows=73049 width=8) predicate:d_date_sk is not null TableScan [TS_26] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] http://git-wip-us.apache.org/repos/asf/hive/blob/9493dcfd/ql/src/test/results/clientpositive/perf/tez/query8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/query8.q.out index 9eb5039..da49d2e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query8.q.out @@ -227,32 +227,32 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Reducer 10 <- Union 9 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 7 (BROADCAST_EDGE) +Reducer 11 <- Union 10 (SIMPLE_EDGE) +Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 10 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 10 (CONTAINS) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_146] - Limit [LIM_145] (rows=1 width=200) + File Output Operator [FS_149] + Limit [LIM_148] (rows=1 width=200) Number of rows:100 - Select Operator [SEL_144] (rows=1 width=200) + Select Operator [SEL_147] (rows=1 width=200) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=200) + SHUFFLE [RS_146] + Group By Operator [GBY_145] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] @@ -263,47 +263,47 @@ Stage-0 keys:_col6,sort order:+,top n:100 Merge Join Operator [MERGEJOIN_118] (rows=1 width=200) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col6"] - <-Reducer 11 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_117] (rows=1 width=92) - Conds:RS_130._col0=RS_133._col2(Inner),Output:["_col1","_col2"] + Conds:RS_141._col0=RS_144._col2(Inner),Output:["_col1","_col2"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] + SHUFFLE [RS_144] PartitionCols:_col2 - Select Operator [SEL_132] (rows=1704 width=276) + Select Operator [SEL_143] (rows=1704 width=276) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_131] (rows=1704 width=181) + Filter Operator [FIL_142] (rows=1704 width=181) predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null) TableScan [TS_42] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_129] (rows=1 width=184) + Select Operator [SEL_140] (rows=1 width=184) Output:["_col0"] - Filter Operator [FIL_128] (rows=1 width=192) + Filter Operator [FIL_139] (rows=1 width=192) predicate:(_col1 = 2L) - Group By Operator [GBY_127] (rows=3098 width=192) + Group By Operator [GBY_138] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Union 9 [SIMPLE_EDGE] + <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_167] + Reduce Output Operator [RS_170] PartitionCols:_col0 - Group By Operator [GBY_166] (rows=3098 width=192) + Group By Operator [GBY_169] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_165] (rows=1126 width=192) + Group By Operator [GBY_168] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_167] PartitionCols:_col0 - Group By Operator [GBY_163] (rows=1126 width=192) + Group By Operator [GBY_166] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_162] (rows=2253 width=97) + Select Operator [SEL_165] (rows=2253 width=97) Output:["_col0"] - Filter Operator [FIL_161] (rows=2253 width=97) + Filter Operator [FIL_164] (rows=2253 width=97) predicate:(_col1 > 10L) - Group By Operator [GBY_160] (rows=6761 width=97) + Group By Operator [GBY_163] (rows=6761 width=97) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_25] @@ -311,40 +311,40 @@ Stage-0 Group By Operator [GBY_24] (rows=67610 width=97) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 Merge Join Operator [MERGEJOIN_116] (rows=26666667 width=89) - Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1"] + Conds:RS_159._col0=RS_162._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_155] (rows=40000000 width=93) + Select Operator [SEL_158] (rows=40000000 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=40000000 width=93) + Filter Operator [FIL_157] (rows=40000000 width=93) predicate:(ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_14] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_158] (rows=26666667 width=4) + Select Operator [SEL_161] (rows=26666667 width=4) Output:["_col0"] - Filter Operator [FIL_157] (rows=26666667 width=89) + Filter Operator [FIL_160] (rows=26666667 width=89) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) TableScan [TS_17] (rows=80000000 width=89) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] - <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_153] + <-Reducer 9 [CONTAINS] vectorized + Reduce Output Operator [RS_156] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=3098 width=192) + Group By Operator [GBY_155] (rows=3098 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_151] (rows=5071 width=192) + Group By Operator [GBY_154] (rows=5071 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] PartitionCols:_col0 - Group By Operator [GBY_149] (rows=70994 width=192) + Group By Operator [GBY_152] (rows=70994 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_148] (rows=20000000 width=89) + Select Operator [SEL_151] (rows=20000000 width=89) Output:["_col0"] - Filter Operator [FIL_147] (rows=20000000 width=89) + Filter Operator [FIL_150] (rows=20000000 width=89) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736' , '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '6569 0', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15 799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_6] (rows=40000000 width=89) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] @@ -352,34 +352,34 @@ Stage-0 SHUFFLE [RS_52] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_115] (rows=37399754 width=42) - Conds:RS_138._col0=RS_141._col0(Inner),Output:["_col1","_col2"] + Conds:RS_137._col0=RS_129._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_127] (rows=130 width=12) + predicate:((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + SHUFFLE [RS_137] PartitionCols:_col0 - Select Operator [SEL_137] (rows=525329897 width=114) + Select Operator [SEL_136] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_136] (rows=525329897 width=114) - predicate:((ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + Filter Operator [FIL_135] (rows=525329897 width=114) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_134] + Group By Operator [GBY_133] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_92] - Group By Operator [GBY_91] (rows=1 width=12) + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_90] (rows=1 width=8) + Select Operator [SEL_130] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_117] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] - PartitionCols:_col0 - Select Operator [SEL_140] (rows=130 width=4) - Output:["_col0"] - Filter Operator [FIL_139] (rows=130 width=12) - predicate:((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + Please refer to the previous Select Operator [SEL_128]