[01/54] [abbrv] hive git commit: HIVE-17896: TopNKey: Create a standalone vectorizable TopNKey operator (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

sershe Thu, 19 Jul 2018 14:45:04 -0700

Repository: hive
Updated Branches:
  refs/heads/master-txnstats 174c6748f -> 651e79509



http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/perf/tez/query82.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query82.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query82.q.out
index f1765e7..bb5a9e9 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query82.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query82.q.out
@@ -46,83 +46,85 @@ Stage-0
     limit:100
     Stage-1
       Reducer 4 vectorized
-      File Output Operator [FS_96]
-        Limit [LIM_95] (rows=100 width=88)
+      File Output Operator [FS_97]
+        Limit [LIM_96] (rows=100 width=88)
           Number of rows:100
-          Select Operator [SEL_94] (rows=633595212 width=88)
+          Select Operator [SEL_95] (rows=633595212 width=88)
             Output:["_col0","_col1","_col2"]
           <-Reducer 3 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_93]
-              Group By Operator [GBY_92] (rows=633595212 width=88)
+            SHUFFLE [RS_94]
+              Group By Operator [GBY_93] (rows=633595212 width=88)
                 Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, 
KEY._col2
               <-Reducer 2 [SIMPLE_EDGE]
                 SHUFFLE [RS_22]
                   PartitionCols:_col0, _col1, _col2
                   Group By Operator [GBY_21] (rows=1267190424 width=88)
                     Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4
-                    Merge Join Operator [MERGEJOIN_72] (rows=1267190424 
width=88)
-                      
Conds:RS_91._col0=RS_75._col0(Inner),RS_75._col0=RS_18._col1(Inner),Output:["_col2","_col3","_col4"]
-                    <-Map 5 [SIMPLE_EDGE] vectorized
-                      PARTITION_ONLY_SHUFFLE [RS_75]
-                        PartitionCols:_col0
-                        Select Operator [SEL_74] (rows=25666 width=1436)
-                          Output:["_col0","_col1","_col2","_col3"]
-                          Filter Operator [FIL_73] (rows=25666 width=1436)
-                            predicate:((i_manufact_id) IN (437, 129, 727, 663) 
and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null)
-                            TableScan [TS_3] (rows=462000 width=1436)
-                              
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"]
-                    <-Reducer 8 [SIMPLE_EDGE]
-                      PARTITION_ONLY_SHUFFLE [RS_18]
-                        PartitionCols:_col1
-                        Merge Join Operator [MERGEJOIN_71] (rows=4593600 
width=15)
-                          Conds:RS_83._col0=RS_86._col0(Inner),Output:["_col1"]
-                        <-Map 10 [SIMPLE_EDGE] vectorized
-                          SHUFFLE [RS_86]
-                            PartitionCols:_col0
-                            Select Operator [SEL_85] (rows=8116 width=1119)
-                              Output:["_col0"]
-                              Filter Operator [FIL_84] (rows=8116 width=1119)
-                                predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' and d_date_sk 
is not null)
-                                TableScan [TS_9] (rows=73049 width=1119)
-                                  
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
-                        <-Map 7 [SIMPLE_EDGE] vectorized
-                          SHUFFLE [RS_83]
-                            PartitionCols:_col0
-                            Select Operator [SEL_82] (rows=4176000 width=15)
-                              Output:["_col0","_col1"]
-                              Filter Operator [FIL_81] (rows=4176000 width=15)
-                                predicate:(inv_date_sk is not null and 
inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500)
-                                TableScan [TS_6] (rows=37584000 width=15)
-                                  
default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"]
-                    <-Map 1 [SIMPLE_EDGE] vectorized
-                      SHUFFLE [RS_91]
-                        PartitionCols:_col0
-                        Select Operator [SEL_90] (rows=575995635 width=88)
-                          Output:["_col0"]
-                          Filter Operator [FIL_89] (rows=575995635 width=88)
-                            predicate:((ss_item_sk BETWEEN 
DynamicValue(RS_17_item_i_item_sk_min) AND 
DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, 
DynamicValue(RS_17_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN 
DynamicValue(RS_18_inventory_inv_item_sk_min) AND 
DynamicValue(RS_18_inventory_inv_item_sk_max) and in_bloom_filter(ss_item_sk, 
DynamicValue(RS_18_inventory_inv_item_sk_bloom_filter))) and ss_item_sk is not 
null)
-                            TableScan [TS_0] (rows=575995635 width=88)
-                              
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk"]
-                            <-Reducer 6 [BROADCAST_EDGE] vectorized
-                              BROADCAST [RS_80]
-                                Group By Operator [GBY_79] (rows=1 width=12)
-                                  
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized
-                                  PARTITION_ONLY_SHUFFLE [RS_78]
-                                    Group By Operator [GBY_77] (rows=1 
width=12)
-                                      
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                      Select Operator [SEL_76] (rows=25666 
width=1436)
-                                        Output:["_col0"]
-                                         Please refer to the previous Select 
Operator [SEL_74]
-                            <-Reducer 9 [BROADCAST_EDGE] vectorized
-                              BROADCAST [RS_88]
-                                Group By Operator [GBY_87] (rows=1 width=12)
-                                  
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=4593600)"]
-                                <-Reducer 8 [CUSTOM_SIMPLE_EDGE]
-                                  PARTITION_ONLY_SHUFFLE [RS_50]
-                                    Group By Operator [GBY_49] (rows=1 
width=12)
-                                      
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=4593600)"]
-                                      Select Operator [SEL_48] (rows=4593600 
width=15)
-                                        Output:["_col0"]
-                                         Please refer to the previous Merge 
Join Operator [MERGEJOIN_71]
+                    Top N Key Operator [TNK_43] (rows=1267190424 width=88)
+                      keys:_col2, _col3, _col4,sort order:+++,top n:100
+                      Merge Join Operator [MERGEJOIN_73] (rows=1267190424 
width=88)
+                        
Conds:RS_92._col0=RS_76._col0(Inner),RS_76._col0=RS_18._col1(Inner),Output:["_col2","_col3","_col4"]
+                      <-Map 5 [SIMPLE_EDGE] vectorized
+                        PARTITION_ONLY_SHUFFLE [RS_76]
+                          PartitionCols:_col0
+                          Select Operator [SEL_75] (rows=25666 width=1436)
+                            Output:["_col0","_col1","_col2","_col3"]
+                            Filter Operator [FIL_74] (rows=25666 width=1436)
+                              predicate:((i_manufact_id) IN (437, 129, 727, 
663) and i_current_price BETWEEN 30 AND 60 and i_item_sk is not null)
+                              TableScan [TS_3] (rows=462000 width=1436)
+                                
default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"]
+                      <-Reducer 8 [SIMPLE_EDGE]
+                        PARTITION_ONLY_SHUFFLE [RS_18]
+                          PartitionCols:_col1
+                          Merge Join Operator [MERGEJOIN_72] (rows=4593600 
width=15)
+                            
Conds:RS_84._col0=RS_87._col0(Inner),Output:["_col1"]
+                          <-Map 10 [SIMPLE_EDGE] vectorized
+                            SHUFFLE [RS_87]
+                              PartitionCols:_col0
+                              Select Operator [SEL_86] (rows=8116 width=1119)
+                                Output:["_col0"]
+                                Filter Operator [FIL_85] (rows=8116 width=1119)
+                                  predicate:(CAST( d_date AS TIMESTAMP) 
BETWEEN TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' and 
d_date_sk is not null)
+                                  TableScan [TS_9] (rows=73049 width=1119)
+                                    
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+                          <-Map 7 [SIMPLE_EDGE] vectorized
+                            SHUFFLE [RS_84]
+                              PartitionCols:_col0
+                              Select Operator [SEL_83] (rows=4176000 width=15)
+                                Output:["_col0","_col1"]
+                                Filter Operator [FIL_82] (rows=4176000 
width=15)
+                                  predicate:(inv_date_sk is not null and 
inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500)
+                                  TableScan [TS_6] (rows=37584000 width=15)
+                                    
default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"]
+                      <-Map 1 [SIMPLE_EDGE] vectorized
+                        SHUFFLE [RS_92]
+                          PartitionCols:_col0
+                          Select Operator [SEL_91] (rows=575995635 width=88)
+                            Output:["_col0"]
+                            Filter Operator [FIL_90] (rows=575995635 width=88)
+                              predicate:((ss_item_sk BETWEEN 
DynamicValue(RS_17_item_i_item_sk_min) AND 
DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, 
DynamicValue(RS_17_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN 
DynamicValue(RS_18_inventory_inv_item_sk_min) AND 
DynamicValue(RS_18_inventory_inv_item_sk_max) and in_bloom_filter(ss_item_sk, 
DynamicValue(RS_18_inventory_inv_item_sk_bloom_filter))) and ss_item_sk is not 
null)
+                              TableScan [TS_0] (rows=575995635 width=88)
+                                
default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk"]
+                              <-Reducer 6 [BROADCAST_EDGE] vectorized
+                                BROADCAST [RS_81]
+                                  Group By Operator [GBY_80] (rows=1 width=12)
+                                    
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
+                                  <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized
+                                    PARTITION_ONLY_SHUFFLE [RS_79]
+                                      Group By Operator [GBY_78] (rows=1 
width=12)
+                                        
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
+                                        Select Operator [SEL_77] (rows=25666 
width=1436)
+                                          Output:["_col0"]
+                                           Please refer to the previous Select 
Operator [SEL_75]
+                              <-Reducer 9 [BROADCAST_EDGE] vectorized
+                                BROADCAST [RS_89]
+                                  Group By Operator [GBY_88] (rows=1 width=12)
+                                    
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=4593600)"]
+                                  <-Reducer 8 [CUSTOM_SIMPLE_EDGE]
+                                    PARTITION_ONLY_SHUFFLE [RS_51]
+                                      Group By Operator [GBY_50] (rows=1 
width=12)
+                                        
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=4593600)"]
+                                        Select Operator [SEL_49] (rows=4593600 
width=15)
+                                          Output:["_col0"]
+                                           Please refer to the previous Merge 
Join Operator [MERGEJOIN_72]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/perf/tez/query99.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query99.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query99.q.out
index b0f6a45..456fd8c 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query99.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query99.q.out
@@ -86,128 +86,130 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 7 vectorized
-      File Output Operator [FS_141]
-        Limit [LIM_140] (rows=100 width=135)
+      File Output Operator [FS_142]
+        Limit [LIM_141] (rows=100 width=135)
           Number of rows:100
-          Select Operator [SEL_139] (rows=210822976 width=135)
+          Select Operator [SEL_140] (rows=210822976 width=135)
             
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
           <-Reducer 6 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_138]
-              Select Operator [SEL_137] (rows=210822976 width=135)
+            SHUFFLE [RS_139]
+              Select Operator [SEL_138] (rows=210822976 width=135)
                 
Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
-                Group By Operator [GBY_136] (rows=210822976 width=135)
+                Group By Operator [GBY_137] (rows=210822976 width=135)
                   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0,
 KEY._col1, KEY._col2
                 <-Reducer 5 [SIMPLE_EDGE]
                   SHUFFLE [RS_30]
                     PartitionCols:_col0, _col1, _col2
                     Group By Operator [GBY_29] (rows=421645953 width=135)
                       
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0,
 _col1, _col2
-                      Select Operator [SEL_27] (rows=421645953 width=135)
-                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-                        Merge Join Operator [MERGEJOIN_100] (rows=421645953 
width=135)
-                          
Conds:RS_24._col3=RS_127._col0(Inner),Output:["_col0","_col1","_col8","_col10","_col12"]
-                        <-Map 14 [SIMPLE_EDGE] vectorized
-                          SHUFFLE [RS_127]
-                            PartitionCols:_col0
-                            Select Operator [SEL_126] (rows=1 width=0)
-                              Output:["_col0","_col1"]
-                              Filter Operator [FIL_125] (rows=1 width=0)
-                                predicate:sm_ship_mode_sk is not null
-                                TableScan [TS_12] (rows=1 width=0)
-                                  
default@ship_mode,ship_mode,Tbl:PARTIAL,Col:NONE,Output:["sm_ship_mode_sk","sm_type"]
-                        <-Reducer 4 [SIMPLE_EDGE]
-                          SHUFFLE [RS_24]
-                            PartitionCols:_col3
-                            Merge Join Operator [MERGEJOIN_99] (rows=383314495 
width=135)
-                              
Conds:RS_21._col4=RS_119._col0(Inner),Output:["_col0","_col1","_col3","_col8","_col10"]
-                            <-Map 12 [SIMPLE_EDGE] vectorized
-                              SHUFFLE [RS_119]
-                                PartitionCols:_col0
-                                Select Operator [SEL_118] (rows=27 width=1029)
-                                  Output:["_col0","_col1"]
-                                  Filter Operator [FIL_117] (rows=27 
width=1029)
-                                    predicate:w_warehouse_sk is not null
-                                    TableScan [TS_9] (rows=27 width=1029)
-                                      
default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"]
-                            <-Reducer 3 [SIMPLE_EDGE]
-                              SHUFFLE [RS_21]
-                                PartitionCols:_col4
-                                Merge Join Operator [MERGEJOIN_98] 
(rows=348467716 width=135)
-                                  
Conds:RS_18._col2=RS_111._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col8"]
-                                <-Map 10 [SIMPLE_EDGE] vectorized
-                                  SHUFFLE [RS_111]
-                                    PartitionCols:_col0
-                                    Select Operator [SEL_110] (rows=60 
width=2045)
-                                      Output:["_col0","_col1"]
-                                      Filter Operator [FIL_109] (rows=60 
width=2045)
-                                        predicate:cc_call_center_sk is not null
-                                        TableScan [TS_6] (rows=60 width=2045)
-                                          
default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_name"]
-                                <-Reducer 2 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_18]
-                                    PartitionCols:_col2
-                                    Merge Join Operator [MERGEJOIN_97] 
(rows=316788826 width=135)
-                                      
Conds:RS_135._col1=RS_103._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
-                                    <-Map 8 [SIMPLE_EDGE] vectorized
-                                      PARTITION_ONLY_SHUFFLE [RS_103]
-                                        PartitionCols:_col0
-                                        Select Operator [SEL_102] (rows=8116 
width=1119)
-                                          Output:["_col0"]
-                                          Filter Operator [FIL_101] (rows=8116 
width=1119)
-                                            predicate:(d_date_sk is not null 
and d_month_seq BETWEEN 1212 AND 1223)
-                                            TableScan [TS_3] (rows=73049 
width=1119)
-                                              
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]
-                                    <-Map 1 [SIMPLE_EDGE] vectorized
-                                      SHUFFLE [RS_135]
-                                        PartitionCols:_col1
-                                        Select Operator [SEL_134] 
(rows=287989836 width=135)
-                                          
Output:["_col0","_col1","_col2","_col3","_col4"]
-                                          Filter Operator [FIL_133] 
(rows=287989836 width=135)
-                                            predicate:((cs_call_center_sk 
BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND 
DynamicValue(RS_19_call_center_cc_call_center_sk_max) and 
in_bloom_filter(cs_call_center_sk, 
DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and 
(cs_ship_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND 
DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, 
DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (cs_ship_mode_sk 
BETWEEN DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_min) AND 
DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_max) and 
in_bloom_filter(cs_ship_mode_sk, 
DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_bloom_filter))) and 
(cs_warehouse_sk BETWEEN DynamicValue(RS_22_warehouse_w_warehouse_sk_min) AND 
DynamicValue(RS_22_warehouse_w_warehouse_sk_max) and 
in_bloom_filter(cs_warehouse_sk, 
DynamicValue(RS_22_warehouse_w_warehouse_sk_bloom_filter))) and 
cs_call_center_sk is not n
 ull and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and 
cs_warehouse_sk is not null)
-                                            TableScan [TS_0] (rows=287989836 
width=135)
-                                              
default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_call_center_sk","cs_ship_mode_sk","cs_warehouse_sk"]
-                                            <-Reducer 11 [BROADCAST_EDGE] 
vectorized
-                                              BROADCAST [RS_116]
-                                                Group By Operator [GBY_115] 
(rows=1 width=12)
-                                                  
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                                <-Map 10 [CUSTOM_SIMPLE_EDGE] 
vectorized
-                                                  SHUFFLE [RS_114]
-                                                    Group By Operator 
[GBY_113] (rows=1 width=12)
-                                                      
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                      Select Operator 
[SEL_112] (rows=60 width=2045)
-                                                        Output:["_col0"]
-                                                         Please refer to the 
previous Select Operator [SEL_110]
-                                            <-Reducer 13 [BROADCAST_EDGE] 
vectorized
-                                              BROADCAST [RS_124]
-                                                Group By Operator [GBY_123] 
(rows=1 width=12)
-                                                  
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                                <-Map 12 [CUSTOM_SIMPLE_EDGE] 
vectorized
-                                                  SHUFFLE [RS_122]
-                                                    Group By Operator 
[GBY_121] (rows=1 width=12)
-                                                      
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                      Select Operator 
[SEL_120] (rows=27 width=1029)
-                                                        Output:["_col0"]
-                                                         Please refer to the 
previous Select Operator [SEL_118]
-                                            <-Reducer 15 [BROADCAST_EDGE] 
vectorized
-                                              BROADCAST [RS_132]
-                                                Group By Operator [GBY_131] 
(rows=1 width=12)
-                                                  
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                                <-Map 14 [CUSTOM_SIMPLE_EDGE] 
vectorized
-                                                  SHUFFLE [RS_130]
-                                                    Group By Operator 
[GBY_129] (rows=1 width=12)
-                                                      
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                      Select Operator 
[SEL_128] (rows=1 width=0)
-                                                        Output:["_col0"]
-                                                         Please refer to the 
previous Select Operator [SEL_126]
-                                            <-Reducer 9 [BROADCAST_EDGE] 
vectorized
-                                              BROADCAST [RS_108]
-                                                Group By Operator [GBY_107] 
(rows=1 width=12)
-                                                  
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
-                                                <-Map 8 [CUSTOM_SIMPLE_EDGE] 
vectorized
-                                                  PARTITION_ONLY_SHUFFLE 
[RS_106]
-                                                    Group By Operator 
[GBY_105] (rows=1 width=12)
-                                                      
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
-                                                      Select Operator 
[SEL_104] (rows=8116 width=1119)
-                                                        Output:["_col0"]
-                                                         Please refer to the 
previous Select Operator [SEL_102]
+                      Top N Key Operator [TNK_57] (rows=421645953 width=135)
+                        keys:_col0, _col1, _col2,sort order:+++,top n:100
+                        Select Operator [SEL_27] (rows=421645953 width=135)
+                          
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+                          Merge Join Operator [MERGEJOIN_101] (rows=421645953 
width=135)
+                            
Conds:RS_24._col3=RS_128._col0(Inner),Output:["_col0","_col1","_col8","_col10","_col12"]
+                          <-Map 14 [SIMPLE_EDGE] vectorized
+                            SHUFFLE [RS_128]
+                              PartitionCols:_col0
+                              Select Operator [SEL_127] (rows=1 width=0)
+                                Output:["_col0","_col1"]
+                                Filter Operator [FIL_126] (rows=1 width=0)
+                                  predicate:sm_ship_mode_sk is not null
+                                  TableScan [TS_12] (rows=1 width=0)
+                                    
default@ship_mode,ship_mode,Tbl:PARTIAL,Col:NONE,Output:["sm_ship_mode_sk","sm_type"]
+                          <-Reducer 4 [SIMPLE_EDGE]
+                            SHUFFLE [RS_24]
+                              PartitionCols:_col3
+                              Merge Join Operator [MERGEJOIN_100] 
(rows=383314495 width=135)
+                                
Conds:RS_21._col4=RS_120._col0(Inner),Output:["_col0","_col1","_col3","_col8","_col10"]
+                              <-Map 12 [SIMPLE_EDGE] vectorized
+                                SHUFFLE [RS_120]
+                                  PartitionCols:_col0
+                                  Select Operator [SEL_119] (rows=27 
width=1029)
+                                    Output:["_col0","_col1"]
+                                    Filter Operator [FIL_118] (rows=27 
width=1029)
+                                      predicate:w_warehouse_sk is not null
+                                      TableScan [TS_9] (rows=27 width=1029)
+                                        
default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"]
+                              <-Reducer 3 [SIMPLE_EDGE]
+                                SHUFFLE [RS_21]
+                                  PartitionCols:_col4
+                                  Merge Join Operator [MERGEJOIN_99] 
(rows=348467716 width=135)
+                                    
Conds:RS_18._col2=RS_112._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col8"]
+                                  <-Map 10 [SIMPLE_EDGE] vectorized
+                                    SHUFFLE [RS_112]
+                                      PartitionCols:_col0
+                                      Select Operator [SEL_111] (rows=60 
width=2045)
+                                        Output:["_col0","_col1"]
+                                        Filter Operator [FIL_110] (rows=60 
width=2045)
+                                          predicate:cc_call_center_sk is not 
null
+                                          TableScan [TS_6] (rows=60 width=2045)
+                                            
default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_name"]
+                                  <-Reducer 2 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_18]
+                                      PartitionCols:_col2
+                                      Merge Join Operator [MERGEJOIN_98] 
(rows=316788826 width=135)
+                                        
Conds:RS_136._col1=RS_104._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
+                                      <-Map 8 [SIMPLE_EDGE] vectorized
+                                        PARTITION_ONLY_SHUFFLE [RS_104]
+                                          PartitionCols:_col0
+                                          Select Operator [SEL_103] (rows=8116 
width=1119)
+                                            Output:["_col0"]
+                                            Filter Operator [FIL_102] 
(rows=8116 width=1119)
+                                              predicate:(d_date_sk is not null 
and d_month_seq BETWEEN 1212 AND 1223)
+                                              TableScan [TS_3] (rows=73049 
width=1119)
+                                                
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]
+                                      <-Map 1 [SIMPLE_EDGE] vectorized
+                                        SHUFFLE [RS_136]
+                                          PartitionCols:_col1
+                                          Select Operator [SEL_135] 
(rows=287989836 width=135)
+                                            
Output:["_col0","_col1","_col2","_col3","_col4"]
+                                            Filter Operator [FIL_134] 
(rows=287989836 width=135)
+                                              predicate:((cs_call_center_sk 
BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND 
DynamicValue(RS_19_call_center_cc_call_center_sk_max) and 
in_bloom_filter(cs_call_center_sk, 
DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and 
(cs_ship_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND 
DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, 
DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (cs_ship_mode_sk 
BETWEEN DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_min) AND 
DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_max) and 
in_bloom_filter(cs_ship_mode_sk, 
DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_bloom_filter))) and 
(cs_warehouse_sk BETWEEN DynamicValue(RS_22_warehouse_w_warehouse_sk_min) AND 
DynamicValue(RS_22_warehouse_w_warehouse_sk_max) and 
in_bloom_filter(cs_warehouse_sk, 
DynamicValue(RS_22_warehouse_w_warehouse_sk_bloom_filter))) and 
cs_call_center_sk is not
  null and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and 
cs_warehouse_sk is not null)
+                                              TableScan [TS_0] (rows=287989836 
width=135)
+                                                
default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_call_center_sk","cs_ship_mode_sk","cs_warehouse_sk"]
+                                              <-Reducer 11 [BROADCAST_EDGE] 
vectorized
+                                                BROADCAST [RS_117]
+                                                  Group By Operator [GBY_116] 
(rows=1 width=12)
+                                                    
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
+                                                  <-Map 10 
[CUSTOM_SIMPLE_EDGE] vectorized
+                                                    SHUFFLE [RS_115]
+                                                      Group By Operator 
[GBY_114] (rows=1 width=12)
+                                                        
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
+                                                        Select Operator 
[SEL_113] (rows=60 width=2045)
+                                                          Output:["_col0"]
+                                                           Please refer to the 
previous Select Operator [SEL_111]
+                                              <-Reducer 13 [BROADCAST_EDGE] 
vectorized
+                                                BROADCAST [RS_125]
+                                                  Group By Operator [GBY_124] 
(rows=1 width=12)
+                                                    
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
+                                                  <-Map 12 
[CUSTOM_SIMPLE_EDGE] vectorized
+                                                    SHUFFLE [RS_123]
+                                                      Group By Operator 
[GBY_122] (rows=1 width=12)
+                                                        
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
+                                                        Select Operator 
[SEL_121] (rows=27 width=1029)
+                                                          Output:["_col0"]
+                                                           Please refer to the 
previous Select Operator [SEL_119]
+                                              <-Reducer 15 [BROADCAST_EDGE] 
vectorized
+                                                BROADCAST [RS_133]
+                                                  Group By Operator [GBY_132] 
(rows=1 width=12)
+                                                    
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
+                                                  <-Map 14 
[CUSTOM_SIMPLE_EDGE] vectorized
+                                                    SHUFFLE [RS_131]
+                                                      Group By Operator 
[GBY_130] (rows=1 width=12)
+                                                        
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
+                                                        Select Operator 
[SEL_129] (rows=1 width=0)
+                                                          Output:["_col0"]
+                                                           Please refer to the 
previous Select Operator [SEL_127]
+                                              <-Reducer 9 [BROADCAST_EDGE] 
vectorized
+                                                BROADCAST [RS_109]
+                                                  Group By Operator [GBY_108] 
(rows=1 width=12)
+                                                    
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2,
 expectedEntries=1000000)"]
+                                                  <-Map 8 [CUSTOM_SIMPLE_EDGE] 
vectorized
+                                                    PARTITION_ONLY_SHUFFLE 
[RS_107]
+                                                      Group By Operator 
[GBY_106] (rows=1 width=12)
+                                                        
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0,
 expectedEntries=1000000)"]
+                                                        Select Operator 
[SEL_105] (rows=8116 width=1119)
+                                                          Output:["_col0"]
+                                                           Please refer to the 
previous Select Operator [SEL_103]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/tez/topnkey.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/topnkey.q.out 
b/ql/src/test/results/clientpositive/tez/topnkey.q.out
new file mode 100644
index 0000000..66b9191
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/topnkey.q.out
@@ -0,0 +1,162 @@
+PREHOOK: query: EXPLAIN
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:5
+    Stage-1
+      Reducer 3
+      File Output Operator [FS_10]
+        Limit [LIM_9] (rows=5 width=95)
+          Number of rows:5
+          Select Operator [SEL_8] (rows=250 width=95)
+            Output:["_col0","_col1"]
+          <-Reducer 2 [SIMPLE_EDGE]
+            SHUFFLE [RS_7]
+              Group By Operator [GBY_5] (rows=250 width=95)
+                
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+              <-Map 1 [SIMPLE_EDGE]
+                SHUFFLE [RS_4]
+                  PartitionCols:_col0
+                  Group By Operator [GBY_3] (rows=250 width=95)
+                    
Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+                    Top N Key Operator [TNK_11] (rows=500 width=178)
+                      keys:_col0,sort order:+,top n:5
+                      Select Operator [SEL_1] (rows=500 width=178)
+                        Output:["_col0","_col1"]
+                        TableScan [TS_0] (rows=500 width=178)
+                          
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+
+PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0      0
+10     10
+100    200
+103    206
+104    208
+PREHOOK: query: EXPLAIN
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:5
+    Stage-1
+      Reducer 3
+      File Output Operator [FS_9]
+        Limit [LIM_8] (rows=5 width=87)
+          Number of rows:5
+          Select Operator [SEL_7] (rows=250 width=87)
+            Output:["_col0"]
+          <-Reducer 2 [SIMPLE_EDGE]
+            SHUFFLE [RS_6]
+              Group By Operator [GBY_4] (rows=250 width=87)
+                Output:["_col0"],keys:KEY._col0
+              <-Map 1 [SIMPLE_EDGE]
+                SHUFFLE [RS_3]
+                  PartitionCols:_col0
+                  Group By Operator [GBY_2] (rows=250 width=87)
+                    Output:["_col0"],keys:key
+                    Top N Key Operator [TNK_10] (rows=500 width=87)
+                      keys:key,sort order:+,top n:5
+                      Select Operator [SEL_1] (rows=500 width=87)
+                        Output:["key"]
+                        TableScan [TS_0] (rows=500 width=87)
+                          
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+
+PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0
+10
+100
+103
+104
+PREHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:5
+    Stage-1
+      Reducer 3
+      File Output Operator [FS_13]
+        Limit [LIM_12] (rows=5 width=178)
+          Number of rows:5
+          Select Operator [SEL_11] (rows=791 width=178)
+            Output:["_col0","_col1"]
+          <-Reducer 2 [SIMPLE_EDGE]
+            SHUFFLE [RS_10]
+              Select Operator [SEL_9] (rows=791 width=178)
+                Output:["_col0","_col1"]
+                Merge Join Operator [MERGEJOIN_28] (rows=791 width=178)
+                  Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"]
+                <-Map 1 [SIMPLE_EDGE]
+                  SHUFFLE [RS_6]
+                    PartitionCols:_col0
+                    Select Operator [SEL_2] (rows=500 width=87)
+                      Output:["_col0"]
+                      Filter Operator [FIL_16] (rows=500 width=87)
+                        predicate:key is not null
+                        TableScan [TS_0] (rows=500 width=87)
+                          
default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+                <-Map 4 [SIMPLE_EDGE]
+                  SHUFFLE [RS_7]
+                    PartitionCols:_col0
+                    Select Operator [SEL_5] (rows=500 width=178)
+                      Output:["_col0","_col1"]
+                      Filter Operator [FIL_17] (rows=500 width=178)
+                        predicate:key is not null
+                        TableScan [TS_3] (rows=500 width=178)
+                          
default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+
+PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0      val_0
+0      val_0
+0      val_0
+0      val_0
+0      val_0

http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out 
b/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out
new file mode 100644
index 0000000..d6f7cc2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_topnkey.q.out
@@ -0,0 +1,162 @@
+PREHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:5
+    Stage-1
+      Reducer 3 vectorized
+      File Output Operator [FS_20]
+        Limit [LIM_19] (rows=5 width=95)
+          Number of rows:5
+          Select Operator [SEL_18] (rows=250 width=95)
+            Output:["_col0","_col1"]
+          <-Reducer 2 [SIMPLE_EDGE] vectorized
+            SHUFFLE [RS_17]
+              Group By Operator [GBY_16] (rows=250 width=95)
+                
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+              <-Map 1 [SIMPLE_EDGE] vectorized
+                SHUFFLE [RS_15]
+                  PartitionCols:_col0
+                  Group By Operator [GBY_14] (rows=250 width=95)
+                    
Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+                    Top N Key Operator [TNK_13] (rows=500 width=178)
+                      keys:_col0,sort order:+,top n:5
+                      Select Operator [SEL_12] (rows=500 width=178)
+                        Output:["_col0","_col1"]
+                        TableScan [TS_0] (rows=500 width=178)
+                          
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+
+PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0      0
+10     10
+100    200
+103    206
+104    208
+PREHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:5
+    Stage-1
+      Reducer 3 vectorized
+      File Output Operator [FS_19]
+        Limit [LIM_18] (rows=5 width=87)
+          Number of rows:5
+          Select Operator [SEL_17] (rows=250 width=87)
+            Output:["_col0"]
+          <-Reducer 2 [SIMPLE_EDGE] vectorized
+            SHUFFLE [RS_16]
+              Group By Operator [GBY_15] (rows=250 width=87)
+                Output:["_col0"],keys:KEY._col0
+              <-Map 1 [SIMPLE_EDGE] vectorized
+                SHUFFLE [RS_14]
+                  PartitionCols:_col0
+                  Group By Operator [GBY_13] (rows=250 width=87)
+                    Output:["_col0"],keys:key
+                    Top N Key Operator [TNK_12] (rows=500 width=87)
+                      keys:key,sort order:+,top n:5
+                      Select Operator [SEL_11] (rows=500 width=87)
+                        Output:["key"]
+                        TableScan [TS_0] (rows=500 width=87)
+                          
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+
+PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0
+10
+100
+103
+104
+PREHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:5
+    Stage-1
+      Reducer 3 vectorized
+      File Output Operator [FS_37]
+        Limit [LIM_36] (rows=5 width=178)
+          Number of rows:5
+          Select Operator [SEL_35] (rows=791 width=178)
+            Output:["_col0","_col1"]
+          <-Reducer 2 [SIMPLE_EDGE]
+            SHUFFLE [RS_10]
+              Select Operator [SEL_9] (rows=791 width=178)
+                Output:["_col0","_col1"]
+                Merge Join Operator [MERGEJOIN_28] (rows=791 width=178)
+                  Conds:RS_31._col0=RS_34._col0(Inner),Output:["_col0","_col2"]
+                <-Map 1 [SIMPLE_EDGE] vectorized
+                  SHUFFLE [RS_31]
+                    PartitionCols:_col0
+                    Select Operator [SEL_30] (rows=500 width=87)
+                      Output:["_col0"]
+                      Filter Operator [FIL_29] (rows=500 width=87)
+                        predicate:key is not null
+                        TableScan [TS_0] (rows=500 width=87)
+                          
default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
+                <-Map 4 [SIMPLE_EDGE] vectorized
+                  SHUFFLE [RS_34]
+                    PartitionCols:_col0
+                    Select Operator [SEL_33] (rows=500 width=178)
+                      Output:["_col0","_col1"]
+                      Filter Operator [FIL_32] (rows=500 width=178)
+                        predicate:key is not null
+                        TableScan [TS_3] (rows=500 width=178)
+                          
default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+
+PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0      val_0
+0      val_0
+0      val_0
+0      val_0
+0      val_0

http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/topnkey.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/topnkey.q.out 
b/ql/src/test/results/clientpositive/topnkey.q.out
new file mode 100644
index 0000000..31f3a70
--- /dev/null
+++ b/ql/src/test/results/clientpositive/topnkey.q.out
@@ -0,0 +1,301 @@
+PREHOOK: query: EXPLAIN
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: key (type: string), UDFToInteger(substr(value, 5)) 
(type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                aggregations: sum(_col1)
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE 
Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE 
Column stats: COMPLETE
+              TopN Hash Memory Usage: 0.1
+              value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
bigint)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Limit
+            Number of rows: 5
+            Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      0
+10     10
+100    200
+103    206
+104    208
+PREHOOK: query: EXPLAIN
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE 
Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE 
Column stats: COMPLETE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Limit
+            Number of rows: 5
+            Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0
+10
+100
+103
+104
+PREHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: false
+  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            filterExpr: key is not null (type: boolean)
+            Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+          TableScan
+            alias: src2
+            filterExpr: key is not null (type: boolean)
+            Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+              TopN Hash Memory Usage: 0.1
+              value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Limit
+            Number of rows: 5
+            Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      val_0
+0      val_0
+0      val_0
+0      val_0
+0      val_0

http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/vector_topnkey.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_topnkey.q.out 
b/ql/src/test/results/clientpositive/vector_topnkey.q.out
new file mode 100644
index 0000000..ed829e2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_topnkey.q.out
@@ -0,0 +1,480 @@
+PREHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY 
key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:key:string, 1:value:string, 
2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+            Select Operator
+              expressions: key (type: string), UDFToInteger(substr(value, 5)) 
(type: int)
+              outputColumnNames: _col0, _col1
+              Select Vectorization:
+                  className: VectorSelectOperator
+                  native: true
+                  projectedOutputColumnNums: [0, 4]
+                  selectExpressions: CastStringToLong(col 3:string)(children: 
StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int
+              Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                aggregations: sum(_col1)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 4:int) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: HASH
+                    keyExpressions: col 0:string
+                    native: false
+                    vectorProcessingMode: HASH
+                    projectedOutputColumnNums: [0]
+                keys: _col0 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
+                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+                  value expressions: _col1 (type: bigint)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: [DECIMAL_64]
+          featureSupportInUse: [DECIMAL_64]
+          inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 2
+              includeColumns: [0, 1]
+              dataColumns: key:string, value:string
+              partitionColumnCount: 0
+              scratchColumnTypeNames: [string, bigint]
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE 
Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:_col0:string, 1:_col1:bigint]
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Reduce Sink Vectorization:
+                  className: VectorReduceSinkOperator
+                  native: false
+                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE 
Column stats: COMPLETE
+              TopN Hash Memory Usage: 0.1
+              value expressions: _col1 (type: bigint)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 2
+              includeColumns: [0, 1]
+              dataColumns: _col0:string, _col1:bigint
+              partitionColumnCount: 0
+              scratchColumnTypeNames: []
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
bigint)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Limit
+            Number of rows: 5
+            Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP 
BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      0
+10     10
+100    200
+103    206
+104    208
+PREHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE 
Column stats: COMPLETE
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:key:string, 1:value:string, 
2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Select Vectorization:
+                  className: VectorSelectOperator
+                  native: true
+                  projectedOutputColumnNums: [0]
+              Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: HASH
+                    keyExpressions: col 0:string
+                    native: false
+                    vectorProcessingMode: HASH
+                    projectedOutputColumnNums: []
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
+                  Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: [DECIMAL_64]
+          featureSupportInUse: [DECIMAL_64]
+          inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 2
+              includeColumns: [0]
+              dataColumns: key:string, value:string
+              partitionColumnCount: 0
+              scratchColumnTypeNames: []
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE 
Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:_col0:string]
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Reduce Sink Vectorization:
+                  className: VectorReduceSinkOperator
+                  native: false
+                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE 
Column stats: COMPLETE
+              TopN Hash Memory Usage: 0.1
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 1
+              includeColumns: [0]
+              dataColumns: _col0:string
+              partitionColumnCount: 0
+              scratchColumnTypeNames: []
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Limit
+            Number of rows: 5
+            Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0
+10
+100
+103
+104
+PREHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = 
src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src1
+            filterExpr: key is not null (type: boolean)
+            Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+          TableScan
+            alias: src2
+            filterExpr: key is not null (type: boolean)
+            Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: string)
+      Map Vectorization:
+          enabled: false
+          enabledConditionsNotMet: Vectorized map work only works with 1 
TableScanOperator IS false
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:_col0:string, 1:_col1:string]
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Reduce Sink Vectorization:
+                  className: VectorReduceSinkOperator
+                  native: false
+                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+              TopN Hash Memory Usage: 0.1
+              value expressions: _col1 (type: string)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 2
+              includeColumns: [0, 1]
+              dataColumns: _col0:string, _col1:string
+              partitionColumnCount: 0
+              scratchColumnTypeNames: []
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Limit
+            Number of rows: 5
+            Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE 
Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE 
Column stats: COMPLETE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON 
(src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      val_0
+0      val_0
+0      val_0
+0      val_0
+0      val_0

http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
index 9393fb8..044fd16 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
@@ -941,6 +941,25 @@ public final class ObjectInspectorUtils {
     return 0;
   }
 
+  public static int compare(Object[] o1, ObjectInspector[] oi1, Object[] o2,
+                            ObjectInspector[] oi2, boolean[] 
columnSortOrderIsDesc) {
+    assert (o1.length == oi1.length);
+    assert (o2.length == oi2.length);
+    assert (o1.length == o2.length);
+
+    for (int i = 0; i < o1.length; i++) {
+      int r = compare(o1[i], oi1[i], o2[i], oi2[i]);
+      if (r != 0) {
+        if (columnSortOrderIsDesc[i]) {
+          return r;
+        } else {
+          return -r;
+        }
+      }
+    }
+    return 0;
+  }
+
   /**
    * Whether comparison is supported for this type.
    * Currently all types that references any map are not comparable.

[01/54] [abbrv] hive git commit: HIVE-17896: TopNKey: Create a standalone vectorizable TopNKey operator (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

Reply via email to