[1/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

mmccline Tue, 20 Feb 2018 22:35:01 -0800

Repository: hive
Updated Branches:
  refs/heads/master 3df6bc28b -> 8975924ec



http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
index 6b63764..9683efa 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
@@ -140,40 +140,23 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0), sum(VALUE._col1), 
stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), 
avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
-                Group By Vectorization:
-                    aggregators: VectorUDAFAvgFinal(col 
0:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFSumDouble(col 
1:double) -> double, VectorUDAFVarFinal(col 
2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFVarFinal(col 
4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:float>) 
-> double, VectorUDAFVarFinal(col 
6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, 
VectorUDAFCountMerge(col 8:bigint) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    native: false
-                    vectorProcessingMode: GLOBAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
                 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: double), (_col0 + -3728.0) (type: 
double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: 
double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 
(type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 
+ -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), 
_col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + 
-3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) 
(type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) 
(type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) 
(type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), 
(_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), 
(UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) 
(type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 
15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25]
-                      selectExpressions: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 
10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: 
DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, 
DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: 
DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 
12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0:double, 
val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0:double) -> 
12:double, DoubleColMultiplyDoubleColumn(col 16:double, col 
13:double)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 
15:double)(children: DoubleColUnaryMinus(col 15:double)(children: 
DoubleColUnaryM
 inus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val 
-3728.0) -> 13:double) -> 15:double) -> 13:double, DoubleColAddDoubleScalar(col 
0:double, val -3728.0) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 
15:double)(children: DoubleColUnaryMinus(col 13:double)(children: 
DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) 
-> 13:double) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 13:double, 
DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: 
DoubleColUnaryMinus(col 17:double)(children: DoubleColUnaryMinus(col 
16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 
16:double) -> 17:double) -> 16:double) -> 17:double, 
DoubleColMultiplyDoubleColumn(col 18:double, col 2:double)(children: 
DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: 
DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 
16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) 
 -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, 
DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 18:double, 
DoubleColUnaryMinus(col 19:double)(children: 
DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 19:double) -> 
20:double, DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: 
DoubleColUnaryMinus(col 2:double) -> 19:double) -> 21:double, 
DoubleColUnaryMinus(col 22:double)(children: DoubleColDivideDoubleScalar(col 
19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 
19:double) -> 22:double) -> 19:double, DoubleColDivideDoubleColumn(col 
0:double, col 1:double) -> 22:double, DoubleColDivideDoubleColumn(col 
23:double, col 25:double)(children: CastLongToDouble(col 7:tinyint) -> 
23:double, DoubleColDivideDoubleScalar(col 24:double, val -563.0)(children: 
DoubleColUnaryMinus(col 2:double) -> 24:double) -> 25:double) -> 24:double, 
DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleColumn(col 
0:doubl
 e, col 1:double) -> 23:double) -> 25:double
                   Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 1 Data size: 492 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -399,40 +382,23 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFVarianceEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), var_pop(VALUE._col1), 
stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), 
min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8)
-                Group By Vectorization:
-                    aggregators: VectorUDAFMaxLong(col 0:int) -> int, 
VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: var_pop, VectorUDAFVarFinal(col 
2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFMaxDouble(col 3:double) -> double, VectorUDAFAvgFinal(col 
4:struct<count:bigint,sum:double,input:tinyint>) -> double, 
VectorUDAFMinLong(col 5:int) -> int, VectorUDAFMinDouble(col 6:double) -> 
double, VectorUDAFVarFinal(col 
7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFVarFinal(col 
8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    native: false
-                    vectorProcessingMode: GLOBAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
                 Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: int), (UDFToDouble(_col0) / 
-3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- 
(_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) 
(type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 
(type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 
(type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), 
(- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: 
double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: 
double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), 
((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) 
(type: int), _col8 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 10, 11, 1, 13, 2, 14, 9, 
15, 3, 4, 16, 5, 19, 17, 6, 18, 7, 20, 12, 21, 23, 8]
-                      selectExpressions: DoubleColDivideDoubleScalar(col 
9:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 9:double) -> 
10:double, LongColMultiplyLongScalar(col 0:int, val -3728) -> 11:int, 
LongColUnaryMinus(col 12:int)(children: LongColMultiplyLongScalar(col 0:int, 
val -3728) -> 12:int) -> 13:int, LongScalarModuloLongColumn(val -563, col 
12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 
14:int, DoubleColDivideDoubleColumn(col 1:double, col 2:double) -> 9:double, 
DoubleColUnaryMinus(col 2:double) -> 15:double, 
DoubleColSubtractDoubleScalar(col 2:double, val 10.175) -> 16:double, 
DoubleColModuloDoubleColumn(col 17:double, col 18:double)(children: 
CastLongToDouble(col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val 
-3728) -> 12:int) -> 17:double, DoubleColSubtractDoubleScalar(col 2:double, val 
10.175) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 3:double) -> 
17:double, DoubleColModuloDoubleScalar(col 3:doubl
 e, val -26.28) -> 18:double, DoubleColUnaryMinus(col 21:double)(children: 
DoubleColDivideDoubleScalar(col 20:double, val -3728.0)(children: 
CastLongToDouble(col 0:int) -> 20:double) -> 21:double) -> 20:double, 
LongColModuloLongColumn(col 22:int, col 23:int)(children: LongColUnaryMinus(col 
12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 
22:int, LongScalarModuloLongColumn(val -563, col 12:int)(children: 
LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 23:int) -> 
12:int, DoubleColSubtractDoubleColumn(col 24:double, col 4:double)(children: 
DoubleColDivideDoubleScalar(col 21:double, val -3728.0)(children: 
CastLongToDouble(col 0:int) -> 21:double) -> 24:double) -> 21:double, 
LongColUnaryMinus(col 22:int)(children: LongColMultiplyLongScalar(col 0:int, 
val -3728) -> 22:int) -> 23:int
                   Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 1 Data size: 420 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -515,7 +481,7 @@ WHERE  (((cbigint <= 197)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
--20301111      5445.576984978541       -1626869520     7.9684972882908944E16   
1626869520      NULL    -563    NULL    NULL    NULL    -8.935323383084578      
NULL    -1069736047     NULL    NULL    NULL    NULL    NULL    
-5445.576984978541      -58     5454.512308361625       1626869520      
7.2647256545687792E16
+-20301111      5445.576984978541       -1626869520     7.9684972882908944E16   
1626869520      NULL    -563    NULL    NULL    NULL    -8.935323383084578      
NULL    -1069736047     NULL    NULL    NULL    NULL    NULL    
-5445.576984978541      511     5454.512308361625       1626869520      
7.2647256545687792E16
 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT VAR_POP(cbigint),
        (-(VAR_POP(cbigint))),
@@ -650,40 +616,23 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFVarianceEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: var_pop(VALUE._col0), count(VALUE._col1), 
max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), 
stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7)
-                Group By Vectorization:
-                    aggregators: VectorUDAFVarFinal(col 
0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxLong(col 
2:tinyint) -> tinyint, VectorUDAFVarFinal(col 
3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFMaxLong(col 4:int) -> int, VectorUDAFVarFinal(col 
5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFCountMerge(col 6:bigint) -> bigint, 
VectorUDAFAvgFinal(col 7:struct<count:bigint,sum:double,input:tinyint>) -> 
double
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    native: false
-                    vectorProcessingMode: GLOBAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
                 Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: double), (- _col0) (type: double), 
(_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS 
decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), 
(UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), 
(-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: 
bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- 
_col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- 
_col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 
(type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 
(type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 8, 10, 1, 12, 2, 14, 13, 
15, 1, 16, 3, 9, 19, 4, 18, 22, 5, 23, 6, 7, 24]
-                      selectExpressions: DoubleColUnaryMinus(col 0:double) -> 
8:double, DoubleColSubtractDoubleColumn(col 0:double, col 9:double)(children: 
DoubleColUnaryMinus(col 0:double) -> 9:double) -> 10:double, 
DecimalColModuloDecimalScalar(col 11:decimal(19,0), val 79.553)(children: 
CastLongToDecimal(col 1:bigint) -> 11:decimal(19,0)) -> 12:decimal(5,3), 
DoubleColSubtractDoubleColumn(col 9:double, col 13:double)(children: 
CastLongToDouble(col 1:bigint) -> 9:double, DoubleColUnaryMinus(col 0:double) 
-> 13:double) -> 14:double, DoubleColUnaryMinus(col 9:double)(children: 
DoubleColUnaryMinus(col 0:double) -> 9:double) -> 13:double, 
DoubleScalarModuloDoubleColumn(val -1.0, col 9:double)(children: 
DoubleColUnaryMinus(col 0:double) -> 9:double) -> 15:double, 
LongColUnaryMinus(col 1:bigint) -> 16:bigint, DoubleColUnaryMinus(col 
17:double)(children: DoubleColUnaryMinus(col 9:double)(children: 
DoubleColUnaryMinus(col 0:double) -> 9:double) -> 17:double) -> 9:double, 
LongScalarMultiply
 LongColumn(val 762, col 18:bigint)(children: LongColUnaryMinus(col 1:bigint) 
-> 18:bigint) -> 19:bigint, LongColAddLongColumn(col 2:bigint, col 
20:bigint)(children: col 2:tinyint, LongScalarMultiplyLongColumn(val 762, col 
18:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 18:bigint) -> 
20:bigint) -> 18:bigint, DoubleColAddDoubleColumn(col 17:double, col 
21:double)(children: DoubleColUnaryMinus(col 0:double) -> 17:double, 
CastLongToDouble(col 4:int) -> 21:double) -> 22:double, 
LongColModuloLongColumn(col 20:bigint, col 1:bigint)(children: 
LongColUnaryMinus(col 1:bigint) -> 20:bigint) -> 23:bigint, 
LongScalarModuloLongColumn(val -3728, col 20:bigint)(children: 
LongColAddLongColumn(col 2:bigint, col 24:bigint)(children: col 2:tinyint, 
LongScalarMultiplyLongColumn(val 762, col 20:bigint)(children: 
LongColUnaryMinus(col 1:bigint) -> 20:bigint) -> 24:bigint) -> 20:bigint) -> 
24:bigint
                   Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 1 Data size: 340 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -880,40 +829,23 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: 
FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0), max(VALUE._col1), 
stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), 
max(VALUE._col5)
-                Group By Vectorization:
-                    aggregators: VectorUDAFAvgFinal(col 
0:struct<count:bigint,sum:double,input:tinyint>) -> double, 
VectorUDAFMaxLong(col 1:bigint) -> bigint, VectorUDAFVarFinal(col 
2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFVarFinal(col 
3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFVarFinal(col 
4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFMaxDouble(col 5:float) -> float
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    native: false
-                    vectorProcessingMode: GLOBAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: double), (_col0 + 6981.0) (type: 
double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), 
(((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) 
(type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: 
double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), 
(UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * 
-26.28) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 6, 8, 1, 7, 10, 2, 9, 3, 
4, 12, 14, 5, 11]
-                      selectExpressions: DoubleColAddDoubleScalar(col 
0:double, val 6981.0) -> 6:double, DoubleColAddDoubleColumn(col 7:double, col 
0:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 
7:double) -> 8:double, DoubleColDivideDoubleColumn(col 9:double, col 
0:double)(children: DoubleColAddDoubleColumn(col 7:double, col 
0:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 
7:double) -> 9:double) -> 7:double, DoubleColUnaryMinus(col 9:double)(children: 
DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 9:double) -> 10:double, 
DoubleColModuloDoubleColumn(col 0:double, col 11:double)(children: 
DoubleColUnaryMinus(col 9:double)(children: DoubleColAddDoubleScalar(col 
0:double, val 6981.0) -> 9:double) -> 11:double) -> 9:double, 
LongColUnaryMinus(col 1:bigint) -> 12:bigint, DoubleColDivideDoubleColumn(col 
11:double, col 2:double)(children: CastLongToDouble(col 13:bigint)(children: 
LongColUnaryMinus(col 1:bigint) -> 13:bigint) -> 11:do
 uble) -> 14:double, DoubleColMultiplyDoubleScalar(col 4:double, val -26.28) -> 
11:double
                   Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 1 Data size: 328 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -2207,24 +2139,14 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: 
FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), 
var_pop(VALUE._col2), count(VALUE._col3)
-                Group By Vectorization:
-                    aggregators: VectorUDAFVarFinal(col 
1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFVarFinal(col 
3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFCountMerge(col 4:bigint) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:smallint
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2, 3]
                 keys: KEY._col0 (type: smallint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -2232,19 +2154,10 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: smallint), (UDFToInteger(_col0) % 
-75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) 
(type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) 
% -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) 
(type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: 
int), _col4 (type: bigint), (_col4 - -89010) (type: bigint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 5, 1, 7, 2, 11, 12, 3, 8, 
4, 13]
-                      selectExpressions: LongColModuloLongScalar(col 0:int, 
val -75)(children: col 0:smallint) -> 5:int, 
DecimalScalarDivideDecimalColumn(val -1.389, col 6:decimal(5,0))(children: 
CastLongToDecimal(col 0:smallint) -> 6:decimal(5,0)) -> 7:decimal(10,9), 
DoubleColDivideDoubleColumn(col 9:double, col 10:double)(children: 
CastLongToDouble(col 8:int)(children: LongColModuloLongScalar(col 0:int, val 
-75)(children: col 0:smallint) -> 8:int) -> 9:double, CastLongToDouble(col 
2:bigint) -> 10:double) -> 11:double, LongColUnaryMinus(col 8:int)(children: 
LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 8:int) 
-> 12:int, LongColUnaryMinus(col 13:int)(children: LongColUnaryMinus(col 
8:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 
0:smallint) -> 8:int) -> 13:int) -> 8:int, LongColSubtractLongScalar(col 
4:bigint, val -89010) -> 13:bigint
                   Statistics: Num rows: 1251 Data size: 268968 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: smallint), _col1 (type: 
int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), 
_col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: 
int), _col9 (type: bigint), _col10 (type: bigint)
                     sort order: +++++++++++
-                    Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
-                        native: true
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     Statistics: Num rows: 1251 Data size: 268968 Basic stats: 
COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
         Reducer 3 
@@ -2481,24 +2394,14 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFVarianceSampleEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: var_samp(VALUE._col0), count(VALUE._col1), 
sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), 
sum(VALUE._col5)
-                Group By Vectorization:
-                    aggregators: VectorUDAFVarFinal(col 
1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 
3:double) -> double, VectorUDAFVarFinal(col 
4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFVarFinal(col 
5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFSumDouble(col 6:double) -> double
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:double
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
                 keys: KEY._col0 (type: double)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6
@@ -2506,19 +2409,10 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: double), _col1 (type: double), 
(2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: 
bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 
* _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: 
double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + 
_col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), 
(-863.257 % (_col0 * 762.0)) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 1, 7, 8, 2, 10, 11, 3, 4, 
12, 5, 9, 13, 6, 15]
-                      selectExpressions: DoubleScalarMultiplyDoubleColumn(val 
2563.58, col 1:double) -> 7:double, DoubleColUnaryMinus(col 1:double) -> 
8:double, DoubleColAddDoubleScalar(col 9:double, val -5638.15)(children: 
DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1:double) -> 9:double) -> 
10:double, DoubleColMultiplyDoubleColumn(col 9:double, col 12:double)(children: 
DoubleColUnaryMinus(col 1:double) -> 9:double, DoubleColAddDoubleScalar(col 
11:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 
2563.58, col 1:double) -> 11:double) -> 12:double) -> 11:double, 
DoubleColSubtractDoubleColumn(col 0:double, col 9:double)(children: 
DoubleColUnaryMinus(col 1:double) -> 9:double) -> 12:double, 
DoubleColAddDoubleColumn(col 0:double, col 1:double) -> 9:double, 
DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 13:double, 
DoubleScalarModuloDoubleColumn(val -863.257, col 14:double)(children: 
DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 14:double) -
 > 15:double
                   Statistics: Num rows: 1327 Data size: 285309 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: double)
                     sort order: +
-                    Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
-                        native: true
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     Statistics: Num rows: 1327 Data size: 285309 Basic stats: 
COMPLETE Column stats: NONE
                     value expressions: _col1 (type: double), _col2 (type: 
double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), 
_col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: 
double), _col10 (type: double), _col11 (type: double), _col12 (type: double), 
_col13 (type: double), _col14 (type: double)
         Reducer 3 
@@ -2799,24 +2693,14 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), 
count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), 
var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), 
avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), 
stddev_pop(VALUE._col11), sum(VALUE._col12)
-                Group By Vectorization:
-                    aggregators: VectorUDAFVarFinal(col 
2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFAvgFinal(col 
3:struct<count:bigint,sum:double,input:smallint>) -> double, 
VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFMinLong(col 5:tinyint) 
-> tinyint, VectorUDAFVarFinal(col 
6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp, VectorUDAFVarFinal(col 
7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFAvgFinal(col 8:struct<count:bigint,sum:double,input:int>) -> 
double, VectorUDAFVarFinal(col 
9:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp, VectorUDAFAvgFinal(col 
10:struct<count:bigint,sum:double,input:float>) -> double, 
VectorUDAFMinDouble(col 11:double) -> double, VectorUDAFVarFinal(col 
12:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFVarFinal(col 13:struct<count:bigint,
 sum:double,variance:double>) -> double aggregation: stddev_pop, 
VectorUDAFSumLong(col 14:bigint) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:timestamp, col 1:string
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12]
                 keys: KEY._col0 (type: timestamp), KEY._col1 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -2824,19 +2708,10 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: timestamp), _col1 (type: string), 
_col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: 
double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) 
(type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - 
_col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * 
(- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) 
(type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) 
* UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), 
(UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 
/ _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + 
(((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) 
* (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 
10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) 
* (-
  _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * 
UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) 
(type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: 
double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % 
_col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: 
decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + 
(((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: 
double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + 
(((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: 
double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, 
_col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, 
_col36, _col37, _col38
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 1, 2, 15, 16, 3, 17, 18, 
4, 19, 22, 5, 21, 23, 6, 20, 26, 27, 7, 25, 8, 9, 29, 28, 10, 30, 32, 24, 11, 
12, 31, 34, 37, 13, 14, 38, 40, 4, 39]
-                      selectExpressions: DoubleColMultiplyDoubleScalar(col 
2:double, val 10.175) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 
16:double, DoubleColUnaryMinus(col 2:double) -> 17:double, 
DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 18:double, 
LongColUnaryMinus(col 4:bigint) -> 19:bigint, DoubleColMultiplyDoubleColumn(col 
20:double, col 21:double)(children: DoubleScalarSubtractDoubleColumn(val 
-26.28, col 2:double) -> 20:double, DoubleColUnaryMinus(col 2:double) -> 
21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 
20:double)(children: DoubleColMultiplyDoubleColumn(col 20:double, col 
21:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) 
-> 20:double, DoubleColUnaryMinus(col 2:double) -> 21:double) -> 23:double, 
CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
24:bigint) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 
20:double)(children: DoubleColMultiplyDoubleScalar(
 col 2:double, val 10.175) -> 20:double) -> 23:double, 
DoubleColAddDoubleColumn(col 6:double, col 25:double)(children: 
DoubleColMultiplyDoubleColumn(col 26:double, col 20:double)(children: 
DoubleColMultiplyDoubleColumn(col 20:double, col 25:double)(children: 
DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 20:double, 
DoubleColUnaryMinus(col 2:double) -> 25:double) -> 26:double, 
CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
24:bigint) -> 20:double) -> 25:double) -> 20:double, DoubleColUnaryMinus(col 
25:double)(children: DoubleColUnaryMinus(col 2:double) -> 25:double) -> 
26:double, DoubleColDivideDoubleColumn(col 25:double, col 2:double)(children: 
CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
24:bigint) -> 25:double) -> 27:double, DoubleScalarDivideDoubleColumn(val 
10.175, col 3:double) -> 25:double, DoubleColSubtractDoubleColumn(col 
28:double, col 30:double)(children: DoubleColAddDoubleColumn(col 6:double, col 
 29:double)(children: DoubleColMultiplyDoubleColumn(col 30:double, col 
28:double)(children: DoubleColMultiplyDoubleColumn(col 28:double, col 
29:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) 
-> 28:double, DoubleColUnaryMinus(col 2:double) -> 29:double) -> 30:double, 
CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
24:bigint) -> 28:double) -> 29:double) -> 28:double, 
DoubleColMultiplyDoubleColumn(col 31:double, col 29:double)(children: 
DoubleColMultiplyDoubleColumn(col 29:double, col 30:double)(children: 
DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 29:double, 
DoubleColUnaryMinus(col 2:double) -> 30:double) -> 31:double, 
CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
24:bigint) -> 29:double) -> 30:double) -> 29:double, DoubleColUnaryMinus(col 
30:double)(children: DoubleColUnaryMinus(col 28:double)(children: 
DoubleColMultiplyDoubleScalar(col 2:double, val 10.175) -> 28:double) -> 30:
 double) -> 28:double, DoubleColMultiplyDoubleScalar(col 31:double, val 
10.175)(children: DoubleColSubtractDoubleColumn(col 30:double, col 
32:double)(children: DoubleColAddDoubleColumn(col 6:double, col 
31:double)(children: DoubleColMultiplyDoubleColumn(col 32:double, col 
30:double)(children: DoubleColMultiplyDoubleColumn(col 30:double, col 
31:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) 
-> 30:double, DoubleColUnaryMinus(col 2:double) -> 31:double) -> 32:double, 
CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
24:bigint) -> 30:double) -> 31:double) -> 30:double, 
DoubleColMultiplyDoubleColumn(col 33:double, col 31:double)(children: 
DoubleColMultiplyDoubleColumn(col 31:double, col 32:double)(children: 
DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 31:double, 
DoubleColUnaryMinus(col 2:double) -> 32:double) -> 33:double, 
CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
24:bigint) -> 31:
 double) -> 32:double) -> 31:double) -> 30:double, 
DoubleScalarModuloDoubleColumn(val 10.175, col 31:double)(children: 
DoubleScalarDivideDoubleColumn(val 10.175, col 3:double) -> 31:double) -> 
32:double, LongColUnaryMinus(col 5:tinyint) -> 24:tinyint, 
DoubleColUnaryMinus(col 34:double)(children: DoubleColMultiplyDoubleColumn(col 
31:double, col 33:double)(children: DoubleScalarSubtractDoubleColumn(val 
-26.28, col 2:double) -> 31:double, DoubleColUnaryMinus(col 2:double) -> 
33:double) -> 34:double) -> 31:double, DoubleColModuloDoubleColumn(col 
33:double, col 10:double)(children: DoubleColUnaryMinus(col 2:double) -> 
33:double) -> 34:double, DecimalScalarDivideDecimalColumn(val -26.28, col 
36:decimal(3,0))(children: CastLongToDecimal(col 35:tinyint)(children: 
LongColUnaryMinus(col 5:tinyint) -> 35:tinyint) -> 36:decimal(3,0)) -> 
37:decimal(8,6), DoubleColDivideDoubleColumn(col 33:double, col 
7:double)(children: DoubleColAddDoubleColumn(col 6:double, col 
38:double)(children: DoubleColMult
 iplyDoubleColumn(col 39:double, col 33:double)(children: 
DoubleColMultiplyDoubleColumn(col 33:double, col 38:double)(children: 
DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 33:double, 
DoubleColUnaryMinus(col 2:double) -> 38:double) -> 39:double, 
CastLongToDouble(col 35:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
35:bigint) -> 33:double) -> 38:double) -> 33:double) -> 38:double, 
LongColUnaryMinus(col 35:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
35:bigint) -> 40:bigint, DoubleColModuloDoubleScalar(col 33:double, val 
-26.28)(children: DoubleColAddDoubleColumn(col 6:double, col 
39:double)(children: DoubleColMultiplyDoubleColumn(col 41:double, col 
33:double)(children: DoubleColMultiplyDoubleColumn(col 33:double, col 
39:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) 
-> 33:double, DoubleColUnaryMinus(col 2:double) -> 39:double) -> 41:double, 
CastLongToDouble(col 35:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 
35:bigi
 nt) -> 33:double) -> 39:double) -> 33:double) -> 39:double
                   Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: timestamp), _col1 (type: 
string), _col2 (type: double), _col3 (type: double), _col4 (type: double), 
_col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: 
bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), 
_col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 
(type: double), _col16 (type: double), _col17 (type: double), _col18 (type: 
double), _col19 (type: double), _col20 (type: double), _col21 (type: double), 
_col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 
(type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: 
double), _col29 (type: double), _col30 (type: double), _col31 (type: double), 
_col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), 
_col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 
(type: double)
                     sort order: +++++++++++++++++++++++++++++++++++++++
-                    Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
-                        native: true
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     Statistics: Num rows: 6144 Data size: 1320982 Basic stats: 
COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
         Reducer 3 
@@ -3198,24 +3073,14 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFVarianceSampleEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), sum(VALUE._col1), 
var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), 
var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), 
stddev_pop(VALUE._col8), avg(VALUE._col9)
-                Group By Vectorization:
-                    aggregators: VectorUDAFMaxDouble(col 1:float) -> float, 
VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFVarFinal(col 
3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp, VectorUDAFAvgFinal(col 
4:struct<count:bigint,sum:double,input:double>) -> double, 
VectorUDAFMinLong(col 5:bigint) -> bigint, VectorUDAFVarFinal(col 
6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 
8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFVarFinal(col 
9:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFAvgFinal(col 
10:struct<count:bigint,sum:double,input:int>) -> double
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:boolean
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
                 keys: KEY._col0 (type: boolean)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
@@ -3223,19 +3088,10 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: boolean), _col1 (type: float), (- 
_col1) (type: float), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 
(type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), 
_col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- 
_col1)) (type: float), _col4 (type: double), (UDFToDouble((CAST( _col2 AS 
decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 
(type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: 
double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 
(type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), 
(CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) 
(type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) 
- (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 
(type: double), (- _col10) (type: double), (_col10 * UDFToDoubl
 e(_col7)) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 1, 11, 12, 2, 14, 3, 15, 
17, 4, 19, 5, 6, 16, 20, 22, 7, 8, 23, 26, 9, 28, 10, 21, 30]
-                      selectExpressions: DoubleColUnaryMinus(col 1:float) -> 
11:float, DoubleScalarDivideDoubleColumn(val -26.28, col 1:double)(children: 
col 1:float) -> 12:double, DecimalColSubtractDecimalScalar(col 
13:decimal(19,0), val 10.175)(children: CastLongToDecimal(col 2:bigint) -> 
13:decimal(19,0)) -> 14:decimal(23,3), DoubleColModuloDoubleColumn(col 
3:double, col 1:double)(children: col 1:float) -> 15:double, 
DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16:float)(children: 
DoubleColUnaryMinus(col 1:float) -> 16:float) -> 17:float, 
DoubleColAddDoubleColumn(col 16:double, col 3:double)(children: 
CastDecimalToDouble(col 18:decimal(23,3))(children: 
DecimalColSubtractDecimalScalar(col 13:decimal(19,0), val 10.175)(children: 
CastLongToDecimal(col 2:bigint) -> 13:decimal(19,0)) -> 18:decimal(23,3)) -> 
16:double) -> 19:double, DoubleColUnaryMinus(col 20:float)(children: 
DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16:float)(children: 
DoubleColUnaryMinus(col 
 1:float) -> 16:float) -> 20:float) -> 16:float, 
DoubleScalarDivideDoubleColumn(val 79.553, col 6:double) -> 20:double, 
DoubleColModuloDoubleColumn(col 3:double, col 21:double)(children: 
DoubleScalarDivideDoubleColumn(val 79.553, col 6:double) -> 21:double) -> 
22:double, DecimalScalarMultiplyDecimalColumn(val -1.389, col 
13:decimal(19,0))(children: CastLongToDecimal(col 5:bigint) -> 
13:decimal(19,0)) -> 23:decimal(24,3), DecimalColSubtractDecimalColumn(col 
13:decimal(19,0), col 25:decimal(24,3))(children: CastLongToDecimal(col 
7:bigint) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, 
col 24:decimal(19,0))(children: CastLongToDecimal(col 5:bigint) -> 
24:decimal(19,0)) -> 25:decimal(24,3)) -> 26:decimal(25,3), 
FuncNegateDecimalToDecimal(col 27:decimal(25,3))(children: 
DecimalColSubtractDecimalColumn(col 13:decimal(19,0), col 
25:decimal(24,3))(children: CastLongToDecimal(col 7:bigint) -> 
13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24:decimal(
 19,0))(children: CastLongToDecimal(col 5:bigint) -> 24:decimal(19,0)) -> 
25:decimal(24,3)) -> 27:decimal(25,3)) -> 28:decimal(25,3), 
DoubleColUnaryMinus(col 10:double) -> 21:double, 
DoubleColMultiplyDoubleColumn(col 10:double, col 29:double)(children: 
CastLongToDouble(col 7:bigint) -> 29:double) -> 30:double
                   Statistics: Num rows: 5119 Data size: 1100602 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: boolean)
                     sort order: +
-                    Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
-                        native: true
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     Statistics: Num rows: 5119 Data size: 1100602 Basic stats: 
COMPLETE Column stats: NONE
                     value expressions: _col1 (type: float), _col2 (type: 
float), _col3 (type: double), _col4 (type: bigint), _col5 (type: 
decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: 
float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), 
_col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 
(type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: 
decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 
(type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 
(type: double)
         Reducer 3 

http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
index 55ebff2..a5575f5 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
@@ -1088,40 +1088,23 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: true
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFVarianceEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0), variance(VALUE._col1), 
var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), 
stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
-                Group By Vectorization:
-                    aggregators: VectorUDAFAvgFinal(col 
0:struct<count:bigint,sum:double,input:timestamp>) -> double, 
VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: variance, VectorUDAFVarFinal(col 
2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFVarFinal(col 
3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp, VectorUDAFVarFinal(col 
4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: std, 
VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: stddev, VectorUDAFVarFinal(col 
6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    native: false
-                    vectorProcessingMode: GLOBAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
                 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: round(_col0, 0) (type: double), _col1 BETWEEN 
8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 
8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 
9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) 
(type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), 
round(_col7, 3) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [8, 9, 10, 11, 12, 13, 14, 15]
-                      selectExpressions: RoundWithNumDigitsDoubleToDouble(col 
0, decimalPlaces 0) -> 8:double, VectorUDFAdaptor(_col1 BETWEEN 
8.97077295279421E19 AND 8.97077295279422E19) -> 9:boolean, 
VectorUDFAdaptor(_col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 
10:boolean, VectorUDFAdaptor(_col3 BETWEEN 9.20684592523616E19 AND 
9.20684592523617E19) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, 
decimalPlaces 3) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 5, 
decimalPlaces 3) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 6, 
decimalPlaces 3) -> 14:double, RoundWithNumDigitsDoubleToDouble(col 7, 
decimalPlaces 3) -> 15:double
                   Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 1 Data size: 672 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

[1/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

Reply via email to