conversions (Matt McCline, reviewed by Teddy Choi)

mmccline Wed, 08 Aug 2018 00:38:09 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query70.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query70.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query70.q.out
index 4222b52..b719cdb 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query70.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query70.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select  
     sum(ss_net_profit) as total_sum
    ,s_state
@@ -35,7 +35,7 @@ select
   ,rank_within_parent
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select  
     sum(ss_net_profit) as total_sum
    ,s_state
@@ -72,6 +72,10 @@ select
   ,rank_within_parent
  limit 100
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -89,18 +93,40 @@ STAGE PLANS:
                   alias: store
                   filterExpr: (s_state is not null and s_store_sk is not null) 
(type: boolean)
                   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 24:string), SelectColumnIsNotNull(col 0:int))
                     predicate: (s_state is not null and s_store_sk is not 
null) (type: boolean)
                     Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int), s_county (type: 
string), s_state (type: string)
                       outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 23, 24]
                       Statistics: Num rows: 1704 Data size: 3256276 Basic 
stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
+                        Spark Hash Table Sink Vectorization:
+                            className: VectorSparkHashTableSinkOperator
+                            native: true
                         keys:
                           0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Local Work:
               Map Reduce Local Work
 
@@ -114,18 +140,40 @@ STAGE PLANS:
                   alias: store
                   filterExpr: (s_store_sk is not null and s_state is not null) 
(type: boolean)
                   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 24:string))
                     predicate: (s_state is not null and s_store_sk is not 
null) (type: boolean)
                     Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: s_store_sk (type: int), s_state (type: 
string)
                       outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 24]
                       Statistics: Num rows: 1704 Data size: 3256276 Basic 
stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
+                        Spark Hash Table Sink Vectorization:
+                            className: VectorSparkHashTableSinkOperator
+                            native: true
                         keys:
                           0 _col1 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Local Work:
               Map Reduce Local Work
 
@@ -148,81 +196,178 @@ STAGE PLANS:
                   alias: store_sales
                   filterExpr: (ss_sold_date_sk is not null and ss_store_sk is 
not null) (type: boolean)
                   Statistics: Num rows: 575995635 Data size: 50814502088 Basic 
stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int))
                     predicate: (ss_sold_date_sk is not null and ss_store_sk is 
not null) (type: boolean)
                     Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ss_sold_date_sk (type: int), ss_store_sk 
(type: int), ss_net_profit (type: decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 7, 22]
                       Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: 
decimal(7,2))
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 13 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
                   filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk 
is not null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
FilterLongColumnBetween(col 3:int, left 1212, right 1223), 
SelectColumnIsNotNull(col 0:int))
                     predicate: (d_date_sk is not null and d_month_seq BETWEEN 
1212 AND 1223) (type: boolean)
                     Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 7 
             Map Operator Tree:
                 TableScan
                   alias: d1
                   filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk 
is not null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
FilterLongColumnBetween(col 3:int, left 1212, right 1223), 
SelectColumnIsNotNull(col 0:int))
                     predicate: (d_date_sk is not null and d_month_seq BETWEEN 
1212 AND 1223) (type: boolean)
                     Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 9 
             Map Operator Tree:
                 TableScan
                   alias: store_sales
                   filterExpr: (ss_store_sk is not null and ss_sold_date_sk is 
not null) (type: boolean)
                   Statistics: Num rows: 575995635 Data size: 50814502088 Basic 
stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 7:int), SelectColumnIsNotNull(col 0:int))
                     predicate: (ss_sold_date_sk is not null and ss_store_sk is 
not null) (type: boolean)
                     Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ss_sold_date_sk (type: int), ss_store_sk 
(type: int), ss_net_profit (type: decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 7, 22]
                       Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: 
decimal(7,2))
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 10 
             Local Work:
               Map Reduce Local Work
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -256,9 +401,23 @@ STAGE PLANS:
                       value expressions: _col1 (type: decimal(17,2))
         Reducer 11 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> 
decimal(17,2)
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
@@ -267,14 +426,29 @@ STAGE PLANS:
                   key expressions: _col0 (type: string), _col1 (type: 
decimal(17,2))
                   sort order: +-
                   Map-reduce partition columns: _col0 (type: string)
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe 
for keys IS true, LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: No PTF TopN IS false
                   Statistics: Num rows: 348477374 Data size: 30742775095 Basic 
stats: COMPLETE Column stats: NONE
                   TopN Hash Memory Usage: 0.1
         Reducer 12 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), 
KEY.reducesinkkey1 (type: decimal(17,2))
                 outputColumnNames: _col0, _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1]
                 Statistics: Num rows: 348477374 Data size: 30742775095 Basic 
stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
@@ -296,22 +470,47 @@ STAGE PLANS:
                               window function: GenericUDAFRankEvaluator
                               window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
                               isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [col 1:decimal(17,2)]
+                      functionNames: [rank]
+                      native: true
+                      orderExpressions: [col 1:decimal(17,2)]
+                      partitionExpressions: [col 0:string]
                   Statistics: Num rows: 348477374 Data size: 30742775095 Basic 
stats: COMPLETE Column stats: NONE
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: 
FilterLongColLessEqualLongScalar(col 2:int, val 5)
                     predicate: (rank_window_0 <= 5) (type: boolean)
                     Statistics: Num rows: 116159124 Data size: 10247591639 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col0 (type: string)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 116159124 Data size: 10247591639 
Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 116159124 Data size: 10247591639 
Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Local Work:
               Map Reduce Local Work
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -338,6 +537,11 @@ STAGE PLANS:
                     Statistics: Num rows: 696954748 Data size: 61485550191 
Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col2 (type: decimal(7,2)), _col6 
(type: string)
         Reducer 3 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -365,9 +569,23 @@ STAGE PLANS:
                       value expressions: _col3 (type: decimal(17,2))
         Reducer 4 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumDecimal(col 3:decimal(17,2)) -> 
decimal(17,2)
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string, col 1:string, col 2:bigint
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: bigint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3
@@ -375,19 +593,38 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: decimal(17,2)), _col2 (type: bigint)
                   outputColumnNames: _col0, _col1, _col2, _col3
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 3, 2]
                   Statistics: Num rows: 1149975358 Data size: 101451159969 
Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: (grouping(_col3, 1) + grouping(_col3, 0)) 
(type: bigint), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( 
null AS STRING)) END (type: string), _col2 (type: decimal(17,2))
                     sort order: ++-
                     Map-reduce partition columns: (grouping(_col3, 1) + 
grouping(_col3, 0)) (type: bigint), CASE WHEN ((grouping(_col3, 0) = 0)) THEN 
(_col0) ELSE (CAST( null AS STRING)) END (type: string)
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkObjectHashOperator
+                        keyExpressions: LongColAddLongColumn(col 4:bigint, col 
5:bigint)(children: VectorUDFAdaptor(grouping(_col3, 1)) -> 4:bigint, 
VectorUDFAdaptor(grouping(_col3, 0)) -> 5:bigint) -> 6:bigint, 
IfExprColumnNull(col 5:boolean, col 0:string, null)(children: 
LongColEqualLongScalar(col 4:bigint, val 0)(children: 
VectorUDFAdaptor(grouping(_col3, 0)) -> 4:bigint) -> 5:boolean, col 0:string) 
-> 7:string
+                        native: true
+                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     Statistics: Num rows: 1149975358 Data size: 101451159969 
Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col1 (type: 
string), _col3 (type: bigint)
         Reducer 5 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                allNative: true
+                usesVectorUDFAdaptor: true
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY.reducesinkkey2 (type: decimal(17,2)), VALUE._col2 (type: bigint)
                 outputColumnNames: _col0, _col1, _col2, _col3
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [3, 4, 2, 5]
                 Statistics: Num rows: 1149975358 Data size: 101451159969 Basic 
stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
@@ -409,29 +646,62 @@ STAGE PLANS:
                               window function: GenericUDAFRankEvaluator
                               window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
                               isPivotResult: true
+                  PTF Vectorization:
+                      className: VectorPTFOperator
+                      evaluatorClasses: [VectorPTFEvaluatorRank]
+                      functionInputExpressions: [col 2:decimal(17,2)]
+                      functionNames: [rank]
+                      native: true
+                      orderExpressions: [col 2:decimal(17,2)]
+                      partitionExpressions: [LongColAddLongColumn(col 
7:bigint, col 8:bigint)(children: VectorUDFAdaptor(grouping(_col3, 1)) -> 
7:bigint, VectorUDFAdaptor(grouping(_col3, 0)) -> 8:bigint) -> 9:bigint, 
IfExprColumnNull(col 8:boolean, col 3:string, null)(children: 
LongColEqualLongScalar(col 7:bigint, val 0)(children: 
VectorUDFAdaptor(grouping(_col3, 0)) -> 7:bigint) -> 8:boolean, col 3:string) 
-> 10:string]
                   Statistics: Num rows: 1149975358 Data size: 101451159969 
Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: _col2 (type: decimal(17,2)), _col0 (type: 
string), _col1 (type: string), (grouping(_col3, 1) + grouping(_col3, 0)) (type: 
bigint), rank_window_0 (type: int), CASE WHEN (((grouping(_col3, 1) + 
grouping(_col3, 0)) = 0)) THEN (_col0) ELSE (null) END (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [2, 3, 4, 12, 6, 14]
+                        selectExpressions: LongColAddLongColumn(col 7:bigint, 
col 11:bigint)(children: VectorUDFAdaptor(grouping(_col3, 1)) -> 7:bigint, 
VectorUDFAdaptor(grouping(_col3, 0)) -> 11:bigint) -> 12:bigint, 
IfExprColumnNull(col 7:boolean, col 3:string, null)(children: 
LongColEqualLongScalar(col 13:bigint, val 0)(children: LongColAddLongColumn(col 
7:bigint, col 11:bigint)(children: VectorUDFAdaptor(grouping(_col3, 1)) -> 
7:bigint, VectorUDFAdaptor(grouping(_col3, 0)) -> 11:bigint) -> 13:bigint) -> 
7:boolean, col 3:string) -> 14:string
                     Statistics: Num rows: 1149975358 Data size: 101451159969 
Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col3 (type: bigint), _col5 (type: 
string), _col4 (type: int)
                       sort order: -++
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkObjectHashOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 1149975358 Data size: 101451159969 
Basic stats: COMPLETE Column stats: NONE
                       TopN Hash Memory Usage: 0.1
                       value expressions: _col0 (type: decimal(17,2)), _col1 
(type: string), _col2 (type: string)
         Reducer 6 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: VALUE._col0 (type: decimal(17,2)), VALUE._col1 
(type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: bigint), 
KEY.reducesinkkey2 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [3, 4, 5, 0, 2]
                 Statistics: Num rows: 1149975358 Data size: 101451159969 Basic 
stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 100
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
                   Statistics: Num rows: 100 Data size: 8800 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
                     Statistics: Num rows: 100 Data size: 8800 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat


http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query71.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query71.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query71.q.out
index eded78c..2ec7b12 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query71.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query71.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select i_brand_id brand_id, i_brand brand,t_hour,t_minute,
        sum(ext_price) ext_price
  from item, (select ws_ext_sales_price as ext_price, 
@@ -36,7 +36,7 @@ select i_brand_id brand_id, i_brand brand,t_hour,t_minute,
  group by i_brand, i_brand_id,t_hour,t_minute
  order by ext_price desc, i_brand_id
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select i_brand_id brand_id, i_brand brand,t_hour,t_minute,
        sum(ext_price) ext_price
  from item, (select ws_ext_sales_price as ext_price, 
@@ -74,6 +74,10 @@ select i_brand_id brand_id, i_brand brand,t_hour,t_minute,
  group by i_brand, i_brand_id,t_hour,t_minute
  order by ext_price desc, i_brand_id
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -97,139 +101,305 @@ STAGE PLANS:
                   alias: web_sales
                   filterExpr: (ws_sold_date_sk is not null and ws_item_sk is 
not null and ws_sold_time_sk is not null) (type: boolean)
                   Statistics: Num rows: 144002668 Data size: 19580198212 Basic 
stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 3:int), 
SelectColumnIsNotNull(col 1:int))
                     predicate: (ws_item_sk is not null and ws_sold_date_sk is 
not null and ws_sold_time_sk is not null) (type: boolean)
                     Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ws_sold_date_sk (type: int), 
ws_sold_time_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: 
decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1, 3, 23]
                       Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: decimal(7,2))
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 10 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
                   filterExpr: ((d_moy = 12) and (d_year = 2001) and d_date_sk 
is not null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 8:int, val 12), 
FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 
0:int))
                     predicate: ((d_moy = 12) and (d_year = 2001) and d_date_sk 
is not null) (type: boolean)
                     Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 11 
             Map Operator Tree:
                 TableScan
                   alias: store_sales
                   filterExpr: (ss_sold_date_sk is not null and ss_item_sk is 
not null and ss_sold_time_sk is not null) (type: boolean)
                   Statistics: Num rows: 575995635 Data size: 50814502088 Basic 
stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 2:int), 
SelectColumnIsNotNull(col 1:int))
                     predicate: (ss_item_sk is not null and ss_sold_date_sk is 
not null and ss_sold_time_sk is not null) (type: boolean)
                     Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ss_sold_date_sk (type: int), 
ss_sold_time_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: 
decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1, 2, 15]
                       Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: decimal(7,2))
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 13 
             Map Operator Tree:
                 TableScan
                   alias: date_dim
                   filterExpr: ((d_moy = 12) and (d_year = 2001) and d_date_sk 
is not null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 8:int, val 12), 
FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 
0:int))
                     predicate: ((d_moy = 12) and (d_year = 2001) and d_date_sk 
is not null) (type: boolean)
                     Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int)
                       outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
                       Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 14 
             Map Operator Tree:
                 TableScan
                   alias: item
                   filterExpr: ((i_manager_id = 1) and i_item_sk is not null) 
(type: boolean)
                   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 20:int, val 1), SelectColumnIsNotNull(col 
0:int))
                     predicate: ((i_manager_id = 1) and i_item_sk is not null) 
(type: boolean)
                     Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: i_item_sk (type: int), i_brand_id (type: 
int), i_brand (type: string)
                       outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 7, 8]
                       Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 231000 Data size: 331780228 
Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: 
string)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 15 
             Map Operator Tree:
                 TableScan
                   alias: time_dim
                   filterExpr: ((t_meal_time) IN ('breakfast', 'dinner') and 
t_time_sk is not null) (type: boolean)
                   Statistics: Num rows: 86400 Data size: 40694400 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
FilterStringColumnInList(col 9, values breakfast, dinner), 
SelectColumnIsNotNull(col 0:int))
                     predicate: ((t_meal_time) IN ('breakfast', 'dinner') and 
t_time_sk is not null) (type: boolean)
                     Statistics: Num rows: 86400 Data size: 40694400 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: t_time_sk (type: int), t_hour (type: int), 
t_minute (type: int)
                       outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 3, 4]
                       Statistics: Num rows: 86400 Data size: 40694400 Basic 
stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 86400 Data size: 40694400 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: int)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 8 
             Map Operator Tree:
                 TableScan
                   alias: catalog_sales
                   filterExpr: (cs_sold_date_sk is not null and cs_item_sk is 
not null and cs_sold_time_sk is not null) (type: boolean)
                   Statistics: Num rows: 287989836 Data size: 38999608952 Basic 
stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 15:int), 
SelectColumnIsNotNull(col 1:int))
                     predicate: (cs_item_sk is not null and cs_sold_date_sk is 
not null and cs_sold_time_sk is not null) (type: boolean)
                     Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cs_sold_date_sk (type: int), 
cs_sold_time_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: 
decimal(7,2))
                       outputColumnNames: _col0, _col1, _col2, _col3
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1, 15, 23]
                       Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: decimal(7,2))
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 12 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -247,6 +417,11 @@ STAGE PLANS:
                     Statistics: Num rows: 1108786976 Data size: 120333742785 
Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: decimal(7,2)), _col2 
(type: int)
         Reducer 2 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -264,6 +439,11 @@ STAGE PLANS:
                     Statistics: Num rows: 1108786976 Data size: 120333742785 
Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: decimal(7,2)), _col2 
(type: int)
         Reducer 3 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -280,6 +460,11 @@ STAGE PLANS:
                   Statistics: Num rows: 1219665700 Data size: 132367119932 
Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col0 (type: decimal(7,2)), _col4 (type: 
int), _col5 (type: string)
         Reducer 4 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -303,9 +488,23 @@ STAGE PLANS:
                     value expressions: _col4 (type: decimal(17,2))
         Reducer 5 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumDecimal(col 4:decimal(17,2)) -> 
decimal(17,2)
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:int, col 1:int, col 2:int, col 
3:string
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 
(type: int), KEY._col3 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -313,27 +512,53 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col3 (type: string), _col1 (type: int), _col2 
(type: int), _col4 (type: decimal(17,2)), _col0 (type: int)
                   outputColumnNames: _col1, _col2, _col3, _col4, _col5
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [3, 1, 2, 4, 0]
                   Statistics: Num rows: 670816149 Data size: 72801917486 Basic 
stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col4 (type: decimal(17,2)), _col5 (type: 
int)
                     sort order: -+
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkObjectHashOperator
+                        native: true
+                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     Statistics: Num rows: 670816149 Data size: 72801917486 
Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: int)
         Reducer 6 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey1 (type: int), VALUE._col0 
(type: string), VALUE._col1 (type: int), VALUE._col2 (type: int), 
KEY.reducesinkkey0 (type: decimal(17,2))
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [1, 2, 3, 4, 0]
                 Statistics: Num rows: 670816149 Data size: 72801917486 Basic 
stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 670816149 Data size: 72801917486 Basic 
stats: COMPLETE Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 9 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Tagging not supported
+                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:

[13/51] [partial] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)

Reply via email to