Repository: hive Updated Branches: refs/heads/master-txnstats 174c6748f -> 651e79509
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/perf/tez/query82.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query82.q.out b/ql/src/test/results/clientpositive/perf/tez/query82.q.out index f1765e7..bb5a9e9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query82.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query82.q.out @@ -46,83 +46,85 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_96] - Limit [LIM_95] (rows=100 width=88) + File Output Operator [FS_97] + Limit [LIM_96] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_94] (rows=633595212 width=88) + Select Operator [SEL_95] (rows=633595212 width=88) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_93] - Group By Operator [GBY_92] (rows=633595212 width=88) + SHUFFLE [RS_94] + Group By Operator [GBY_93] (rows=633595212 width=88) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_21] (rows=1267190424 width=88) Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 - Merge Join Operator [MERGEJOIN_72] (rows=1267190424 width=88) - Conds:RS_91._col0=RS_75._col0(Inner),RS_75._col0=RS_18._col1(Inner),Output:["_col2","_col3","_col4"] - <-Map 5 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_75] - PartitionCols:_col0 - Select Operator [SEL_74] (rows=25666 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_73] (rows=25666 width=1436) - predicate:((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] - <-Reducer 8 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_71] (rows=4593600 width=15) - Conds:RS_83._col0=RS_86._col0(Inner),Output:["_col1"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_86] - PartitionCols:_col0 - Select Operator [SEL_85] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_84] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_83] - PartitionCols:_col0 - Select Operator [SEL_82] (rows=4176000 width=15) - Output:["_col0","_col1"] - Filter Operator [FIL_81] (rows=4176000 width=15) - predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500) - TableScan [TS_6] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_91] - PartitionCols:_col0 - Select Operator [SEL_90] (rows=575995635 width=88) - Output:["_col0"] - Filter Operator [FIL_89] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_18_inventory_inv_item_sk_min) AND DynamicValue(RS_18_inventory_inv_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_inventory_inv_item_sk_bloom_filter))) and ss_item_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk"] - <-Reducer 6 [BROADCAST_EDGE] vectorized - BROADCAST [RS_80] - Group By Operator [GBY_79] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_78] - Group By Operator [GBY_77] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_76] (rows=25666 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_74] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_88] - Group By Operator [GBY_87] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=4593600)"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_50] - Group By Operator [GBY_49] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=4593600)"] - Select Operator [SEL_48] (rows=4593600 width=15) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_71] + Top N Key Operator [TNK_43] (rows=1267190424 width=88) + keys:_col2, _col3, _col4,sort order:+++,top n:100 + Merge Join Operator [MERGEJOIN_73] (rows=1267190424 width=88) + Conds:RS_92._col0=RS_76._col0(Inner),RS_76._col0=RS_18._col1(Inner),Output:["_col2","_col3","_col4"] + <-Map 5 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_76] + PartitionCols:_col0 + Select Operator [SEL_75] (rows=25666 width=1436) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_74] (rows=25666 width=1436) + predicate:((i_manufact_id) IN (437, 129, 727, 663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null) + TableScan [TS_3] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] + <-Reducer 8 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_72] (rows=4593600 width=15) + Conds:RS_84._col0=RS_87._col0(Inner),Output:["_col1"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_87] + PartitionCols:_col0 + Select Operator [SEL_86] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_85] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' and d_date_sk is not null) + TableScan [TS_9] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_84] + PartitionCols:_col0 + Select Operator [SEL_83] (rows=4176000 width=15) + Output:["_col0","_col1"] + Filter Operator [FIL_82] (rows=4176000 width=15) + predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500) + TableScan [TS_6] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_92] + PartitionCols:_col0 + Select Operator [SEL_91] (rows=575995635 width=88) + Output:["_col0"] + Filter Operator [FIL_90] (rows=575995635 width=88) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_18_inventory_inv_item_sk_min) AND DynamicValue(RS_18_inventory_inv_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_inventory_inv_item_sk_bloom_filter))) and ss_item_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk"] + <-Reducer 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_81] + Group By Operator [GBY_80] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_79] + Group By Operator [GBY_78] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_77] (rows=25666 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_75] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_89] + Group By Operator [GBY_88] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=4593600)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_51] + Group By Operator [GBY_50] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=4593600)"] + Select Operator [SEL_49] (rows=4593600 width=15) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_72] http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/perf/tez/query99.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/query99.q.out b/ql/src/test/results/clientpositive/perf/tez/query99.q.out index b0f6a45..456fd8c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query99.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query99.q.out @@ -86,128 +86,130 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_141] - Limit [LIM_140] (rows=100 width=135) + File Output Operator [FS_142] + Limit [LIM_141] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_139] (rows=210822976 width=135) + Select Operator [SEL_140] (rows=210822976 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] - Select Operator [SEL_137] (rows=210822976 width=135) + SHUFFLE [RS_139] + Select Operator [SEL_138] (rows=210822976 width=135) Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_136] (rows=210822976 width=135) + Group By Operator [GBY_137] (rows=210822976 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_29] (rows=421645953 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0, _col1, _col2 - Select Operator [SEL_27] (rows=421645953 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_100] (rows=421645953 width=135) - Conds:RS_24._col3=RS_127._col0(Inner),Output:["_col0","_col1","_col8","_col10","_col12"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_127] - PartitionCols:_col0 - Select Operator [SEL_126] (rows=1 width=0) - Output:["_col0","_col1"] - Filter Operator [FIL_125] (rows=1 width=0) - predicate:sm_ship_mode_sk is not null - TableScan [TS_12] (rows=1 width=0) - default@ship_mode,ship_mode,Tbl:PARTIAL,Col:NONE,Output:["sm_ship_mode_sk","sm_type"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_99] (rows=383314495 width=135) - Conds:RS_21._col4=RS_119._col0(Inner),Output:["_col0","_col1","_col3","_col8","_col10"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] - PartitionCols:_col0 - Select Operator [SEL_118] (rows=27 width=1029) - Output:["_col0","_col1"] - Filter Operator [FIL_117] (rows=27 width=1029) - predicate:w_warehouse_sk is not null - TableScan [TS_9] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_98] (rows=348467716 width=135) - Conds:RS_18._col2=RS_111._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col8"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] - PartitionCols:_col0 - Select Operator [SEL_110] (rows=60 width=2045) - Output:["_col0","_col1"] - Filter Operator [FIL_109] (rows=60 width=2045) - predicate:cc_call_center_sk is not null - TableScan [TS_6] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_name"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_97] (rows=316788826 width=135) - Conds:RS_135._col1=RS_103._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_103] - PartitionCols:_col0 - Select Operator [SEL_102] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_101] (rows=8116 width=1119) - predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] - PartitionCols:_col1 - Select Operator [SEL_134] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_133] (rows=287989836 width=135) - predicate:((cs_call_center_sk BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND DynamicValue(RS_19_call_center_cc_call_center_sk_max) and in_bloom_filter(cs_call_center_sk, DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (cs_ship_mode_sk BETWEEN DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_bloom_filter))) and (cs_warehouse_sk BETWEEN DynamicValue(RS_22_warehouse_w_warehouse_sk_min) AND DynamicValue(RS_22_warehouse_w_warehouse_sk_max) and in_bloom_filter(cs_warehouse_sk, DynamicValue(RS_22_warehouse_w_warehouse_sk_bloom_filter))) and cs_call_center_sk is not n ull and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and cs_warehouse_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_call_center_sk","cs_ship_mode_sk","cs_warehouse_sk"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_116] - Group By Operator [GBY_115] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] - Group By Operator [GBY_113] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_112] (rows=60 width=2045) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_110] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_124] - Group By Operator [GBY_123] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] - Group By Operator [GBY_121] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_120] (rows=27 width=1029) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_118] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] - Group By Operator [GBY_129] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_128] (rows=1 width=0) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_126] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_108] - Group By Operator [GBY_107] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=8116 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] + Top N Key Operator [TNK_57] (rows=421645953 width=135) + keys:_col0, _col1, _col2,sort order:+++,top n:100 + Select Operator [SEL_27] (rows=421645953 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_101] (rows=421645953 width=135) + Conds:RS_24._col3=RS_128._col0(Inner),Output:["_col0","_col1","_col8","_col10","_col12"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Select Operator [SEL_127] (rows=1 width=0) + Output:["_col0","_col1"] + Filter Operator [FIL_126] (rows=1 width=0) + predicate:sm_ship_mode_sk is not null + TableScan [TS_12] (rows=1 width=0) + default@ship_mode,ship_mode,Tbl:PARTIAL,Col:NONE,Output:["sm_ship_mode_sk","sm_type"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_100] (rows=383314495 width=135) + Conds:RS_21._col4=RS_120._col0(Inner),Output:["_col0","_col1","_col3","_col8","_col10"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] + PartitionCols:_col0 + Select Operator [SEL_119] (rows=27 width=1029) + Output:["_col0","_col1"] + Filter Operator [FIL_118] (rows=27 width=1029) + predicate:w_warehouse_sk is not null + TableScan [TS_9] (rows=27 width=1029) + default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_99] (rows=348467716 width=135) + Conds:RS_18._col2=RS_112._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col8"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] + PartitionCols:_col0 + Select Operator [SEL_111] (rows=60 width=2045) + Output:["_col0","_col1"] + Filter Operator [FIL_110] (rows=60 width=2045) + predicate:cc_call_center_sk is not null + TableScan [TS_6] (rows=60 width=2045) + default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_name"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_98] (rows=316788826 width=135) + Conds:RS_136._col1=RS_104._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_104] + PartitionCols:_col0 + Select Operator [SEL_103] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_102] (rows=8116 width=1119) + predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_136] + PartitionCols:_col1 + Select Operator [SEL_135] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_134] (rows=287989836 width=135) + predicate:((cs_call_center_sk BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND DynamicValue(RS_19_call_center_cc_call_center_sk_max) and in_bloom_filter(cs_call_center_sk, DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (cs_ship_mode_sk BETWEEN DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_bloom_filter))) and (cs_warehouse_sk BETWEEN DynamicValue(RS_22_warehouse_w_warehouse_sk_min) AND DynamicValue(RS_22_warehouse_w_warehouse_sk_max) and in_bloom_filter(cs_warehouse_sk, DynamicValue(RS_22_warehouse_w_warehouse_sk_bloom_filter))) and cs_call_center_sk is not null and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and cs_warehouse_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_call_center_sk","cs_ship_mode_sk","cs_warehouse_sk"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_117] + Group By Operator [GBY_116] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_113] (rows=60 width=2045) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_111] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_125] + Group By Operator [GBY_124] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + Group By Operator [GBY_122] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_121] (rows=27 width=1029) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_119] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_131] + Group By Operator [GBY_130] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_129] (rows=1 width=0) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_127] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_107] + Group By Operator [GBY_106] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_105] (rows=8116 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_103] http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/tez/topnkey.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/topnkey.q.out b/ql/src/test/results/clientpositive/tez/topnkey.q.out new file mode 100644 index 0000000..66b9191 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/topnkey.q.out @@ -0,0 +1,162 @@ +PREHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 + File Output Operator [FS_10] + Limit [LIM_9] (rows=5 width=95) + Number of rows:5 + Select Operator [SEL_8] (rows=250 width=95) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_7] + Group By Operator [GBY_5] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_11] (rows=500 width=178) + keys:_col0,sort order:+,top n:5 + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 + File Output Operator [FS_9] + Limit [LIM_8] (rows=5 width=87) + Number of rows:5 + Select Operator [SEL_7] (rows=250 width=87) + Output:["_col0"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_6] + Group By Operator [GBY_4] (rows=250 width=87) + Output:["_col0"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_3] + PartitionCols:_col0 + Group By Operator [GBY_2] (rows=250 width=87) + Output:["_col0"],keys:key + Top N Key Operator [TNK_10] (rows=500 width=87) + keys:key,sort order:+,top n:5 + Select Operator [SEL_1] (rows=500 width=87) + Output:["key"] + TableScan [TS_0] (rows=500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 + File Output Operator [FS_13] + Limit [LIM_12] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_11] (rows=791 width=178) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + Select Operator [SEL_9] (rows=791 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_28] (rows=791 width=178) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_16] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_17] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out b/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out new file mode 100644 index 0000000..d6f7cc2 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out @@ -0,0 +1,162 @@ +PREHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_20] + Limit [LIM_19] (rows=5 width=95) + Number of rows:5 + Select Operator [SEL_18] (rows=250 width=95) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_17] + Group By Operator [GBY_16] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_15] + PartitionCols:_col0 + Group By Operator [GBY_14] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_13] (rows=500 width=178) + keys:_col0,sort order:+,top n:5 + Select Operator [SEL_12] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_19] + Limit [LIM_18] (rows=5 width=87) + Number of rows:5 + Select Operator [SEL_17] (rows=250 width=87) + Output:["_col0"] + <-Reducer 2 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_16] + Group By Operator [GBY_15] (rows=250 width=87) + Output:["_col0"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=250 width=87) + Output:["_col0"],keys:key + Top N Key Operator [TNK_12] (rows=500 width=87) + keys:key,sort order:+,top n:5 + Select Operator [SEL_11] (rows=500 width=87) + Output:["key"] + TableScan [TS_0] (rows=500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_37] + Limit [LIM_36] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_35] (rows=791 width=178) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + Select Operator [SEL_9] (rows=791 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_28] (rows=791 width=178) + Conds:RS_31._col0=RS_34._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_31] + PartitionCols:_col0 + Select Operator [SEL_30] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_29] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_34] + PartitionCols:_col0 + Select Operator [SEL_33] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_32] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/topnkey.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/topnkey.q.out b/ql/src/test/results/clientpositive/topnkey.q.out new file mode 100644 index 0000000..31f3a70 --- /dev/null +++ b/ql/src/test/results/clientpositive/topnkey.q.out @@ -0,0 +1,301 @@ +PREHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/vector_topnkey.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_topnkey.q.out b/ql/src/test/results/clientpositive/vector_topnkey.q.out new file mode 100644 index 0000000..ed829e2 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_topnkey.q.out @@ -0,0 +1,480 @@ +PREHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] + selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:string, 1:_col1:bigint] + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: _col0:string, _col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:string] + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:string, 1:_col1:string] + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: _col0:string, _col1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 9393fb8..044fd16 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -941,6 +941,25 @@ public final class ObjectInspectorUtils { return 0; } + public static int compare(Object[] o1, ObjectInspector[] oi1, Object[] o2, + ObjectInspector[] oi2, boolean[] columnSortOrderIsDesc) { + assert (o1.length == oi1.length); + assert (o2.length == oi2.length); + assert (o1.length == o2.length); + + for (int i = 0; i < o1.length; i++) { + int r = compare(o1[i], oi1[i], o2[i], oi2[i]); + if (r != 0) { + if (columnSortOrderIsDesc[i]) { + return r; + } else { + return -r; + } + } + } + return 0; + } + /** * Whether comparison is supported for this type. * Currently all types that references any map are not comparable.
