[2/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

mmccline Tue, 20 Feb 2018 22:34:33 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
index f4a5b55..591de4b 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
@@ -144,31 +144,14 @@ STAGE PLANS:
                     partitionColumnCount: 0
                     scratchColumnTypeNames: []
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                reduceColumnNullOrder: aaaa
-                reduceColumnSortOrder: ++++
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 9
-                    dataColumns: KEY._col0:double, KEY._col1:bigint, 
KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, 
VALUE._col1:struct<count:bigint,sum:double,variance:double>, 
VALUE._col2:struct<count:bigint,sum:double,input:double>, VALUE._col3:bigint, 
VALUE._col4:struct<count:bigint,sum:double,variance:double>
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: 
FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), 
avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4)
-                Group By Vectorization:
-                    aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, 
VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: stddev_samp, VectorUDAFAvgFinal(col 
6:struct<count:bigint,sum:double,input:double>) -> double, 
VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 
8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:double, col 1:bigint, col 2:string, 
col 3:boolean
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4]
                 keys: KEY._col0 (type: double), KEY._col1 (type: bigint), 
KEY._col2 (type: string), KEY._col3 (type: boolean)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
@@ -176,21 +159,10 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col1 (type: bigint), _col3 (type: boolean), 
_col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), 
(- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), 
_col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- 
((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- 
(-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) 
(type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: 
double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * 
_col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col17, _col18, _col19
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [1, 3, 2, 0, 9, 10, 4, 11, 5, 
13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
-                      selectExpressions: DoubleScalarMultiplyDoubleColumn(val 
-6432.0, col 0:double) -> 9:double, LongColUnaryMinus(col 1:bigint) -> 
10:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 11:bigint, 
DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: 
DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 
13:double, DoubleColUnaryMinus(col 14:double)(children: 
DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: 
DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 
14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: 
DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 
15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 
16:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 
16:decimal(19,0)) -> 17:decimal(22,2), DoubleColDivideDoubleColumn(col 
6:double, col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val 
-6432.0, col 0:
 double) -> 14:double) -> 18:double, DoubleColUnaryMinus(col 
14:double)(children: DoubleColUnaryMinus(col 19:double)(children: 
DoubleColDivideDoubleScalar(col 14:double, val -6432.0)(children: 
DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 
19:double) -> 14:double) -> 19:double, DoubleColAddDoubleColumn(col 20:double, 
col 21:double)(children: DoubleColDivideDoubleScalar(col 14:double, val 
-6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) 
-> 14:double) -> 20:double, DoubleColUnaryMinus(col 14:double)(children: 
DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 
21:double) -> 14:double
                   Statistics: Num rows: 1877 Data size: 403561 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col3 (type: double), _col0 (type: 
bigint), _col2 (type: string)
                     sort order: +++
-                    Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
-                        keyColumnNums: [0, 1, 2]
-                        native: true
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        valueColumnNums: [3, 9, 10, 4, 11, 5, 13, 12, 6, 15, 
17, 7, 18, 19, 14, 8]
                     Statistics: Num rows: 1877 Data size: 403561 Basic stats: 
COMPLETE Column stats: NONE
                     value expressions: _col1 (type: boolean), _col4 (type: 
double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), 
_col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 
(type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 
(type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: 
double), _col19 (type: double)
         Reducer 3


http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
index 6d5c27f..e6c1f12 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
@@ -146,31 +146,14 @@ STAGE PLANS:
                     partitionColumnCount: 0
                     scratchColumnTypeNames: [double, decimal(11,4)]
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                reduceColumnNullOrder: aaaaa
-                reduceColumnSortOrder: +++++
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 11
-                    dataColumns: KEY._col0:boolean, KEY._col1:tinyint, 
KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, 
VALUE._col1:double, 
VALUE._col2:struct<count:bigint,sum:double,variance:double>, 
VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:float, 
VALUE._col5:tinyint
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), sum(VALUE._col1), 
stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), 
min(VALUE._col5)
-                Group By Vectorization:
-                    aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, 
VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 
7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 
10:tinyint) -> tinyint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:boolean, col 1:tinyint, col 
2:timestamp, col 3:float, col 4:string
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
                 keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), 
KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
@@ -178,21 +161,10 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: boolean), _col1 (type: tinyint), 
_col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) 
(type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), 
_col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: 
double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: 
double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + 
_col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: 
double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 
* UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), 
_col10 (type: tinyint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 
16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
-                      selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 
11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: 
LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, 
DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: 
CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, 
col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 
14:tinyint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6:double) -> 
15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) 
-> 17:float, DoubleColUnaryMinus(col 6:double) -> 18:double, 
DecimalColSubtractDecimalScalar(col 19:decimal(3,0), val 10.175)(children: 
CastLongToDecimal(col 14:tinyint)(children: LongColAddLongColumn(col 
12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 
12:tinyint) -> 14:tinyint) -> 19:decimal(3,0)) -> 20:decimal(7,3), 
DoubleColUnaryMinus(col 21:double)(children: DoubleColUna
 ryMinus(col 6:double) -> 21:double) -> 22:double, 
DoubleScalarDivideDoubleColumn(val -26.28, col 23:double)(children: 
DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 6:double) 
-> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 
24:double, col 23:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, 
col 23:double)(children: CastLongToDouble(col 14:tinyint)(children: 
LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: 
LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 23:double) -> 
24:double, CastLongToDouble(col 1:tinyint) -> 23:double) -> 25:double
                   Statistics: Num rows: 1365 Data size: 293479 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: boolean), _col1 (type: 
tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), 
_col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 
(type: double), _col9 (type: double), _col10 (type: double), _col11 (type: 
float), _col12 (type: double), _col13 (type: double), _col14 (type: double), 
_col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), 
_col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
                     sort order: +++++++++++++++++++++
-                    Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
-                        keyColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 
17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
-                        native: true
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        valueColumnNums: []
                     Statistics: Num rows: 1365 Data size: 293479 Basic stats: 
COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
         Reducer 3 
@@ -489,24 +461,14 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), sum(VALUE._col1), 
stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), 
min(VALUE._col5)
-                Group By Vectorization:
-                    aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, 
VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 
7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 
10:tinyint) -> tinyint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:boolean, col 1:tinyint, col 
2:timestamp, col 3:float, col 4:string
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
                 keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), 
KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
@@ -514,19 +476,10 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: boolean), _col1 (type: tinyint), 
_col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) 
(type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), 
_col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: 
double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: 
double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + 
_col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: 
double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 
* UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), 
_col10 (type: tinyint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 
16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
-                      selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 
11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: 
LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, 
DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: 
CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, 
col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 
14:tinyint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6:double) -> 
15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) 
-> 17:float, DoubleColUnaryMinus(col 6:double) -> 18:double, 
DecimalColSubtractDecimalScalar(col 19:decimal(3,0), val 10.175)(children: 
CastLongToDecimal(col 14:tinyint)(children: LongColAddLongColumn(col 
12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 
12:tinyint) -> 14:tinyint) -> 19:decimal(3,0)) -> 20:decimal(7,3), 
DoubleColUnaryMinus(col 21:double)(children: DoubleColUna
 ryMinus(col 6:double) -> 21:double) -> 22:double, 
DoubleScalarDivideDoubleColumn(val -26.28, col 23:double)(children: 
DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 6:double) 
-> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 
24:double, col 23:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, 
col 23:double)(children: CastLongToDouble(col 14:tinyint)(children: 
LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: 
LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 23:double) -> 
24:double, CastLongToDouble(col 1:tinyint) -> 23:double) -> 25:double
                   Statistics: Num rows: 1365 Data size: 293479 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: boolean), _col1 (type: 
tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), 
_col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 
(type: double), _col9 (type: double), _col10 (type: double), _col11 (type: 
float), _col12 (type: double), _col13 (type: double), _col14 (type: double), 
_col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), 
_col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
                     sort order: +++++++++++++++++++++
-                    Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
-                        native: true
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     Statistics: Num rows: 1365 Data size: 293479 Basic stats: 
COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
         Reducer 3 

http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
index 3016203..6fa51e5 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
@@ -147,31 +147,14 @@ STAGE PLANS:
                     partitionColumnCount: 0
                     scratchColumnTypeNames: [double, double]
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                reduceColumnNullOrder: aaaaa
-                reduceColumnSortOrder: +++++
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 11
-                    dataColumns: KEY._col0:string, KEY._col1:float, 
KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, 
VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:float, 
VALUE._col2:struct<count:bigint,sum:double,variance:double>, 
VALUE._col3:bigint, 
VALUE._col4:struct<count:bigint,sum:double,variance:double>, 
VALUE._col5:struct<count:bigint,sum:double,variance:double>
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: 
FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), 
stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), 
var_samp(VALUE._col5)
-                Group By Vectorization:
-                    aggregators: VectorUDAFVarFinal(col 
5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFMaxDouble(col 6:float) -> float, VectorUDAFVarFinal(col 
7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFCountMerge(col 8:bigint) -> bigint, 
VectorUDAFVarFinal(col 9:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: var_pop, VectorUDAFVarFinal(col 
10:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_samp
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:string, col 1:float, col 2:double, 
col 3:timestamp, col 4:boolean
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
                 keys: KEY._col0 (type: string), KEY._col1 (type: float), 
KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
@@ -179,21 +162,10 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col3 (type: timestamp), _col1 (type: float), 
_col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + 
_col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: 
double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: 
float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: 
double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 
10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - 
_col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), 
_col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [3, 1, 0, 4, 2, 11, 13, 5, 
12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22]
-                      selectExpressions: DoubleScalarAddDoubleColumn(val 
-26.28, col 2:double) -> 11:double, DoubleColUnaryMinus(col 
12:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 
12:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1:float, val 
-26.280000686645508) -> 12:float, DoubleColUnaryMinus(col 1:float) -> 14:float, 
DoubleColUnaryMinus(col 6:float) -> 15:float, DoubleColDivideDoubleScalar(col 
17:double, val 10.175)(children: DoubleColUnaryMinus(col 16:double)(children: 
DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 16:double) -> 
17:double) -> 16:double, DoubleColUnaryMinus(col 17:double)(children: 
DoubleColDivideDoubleScalar(col 18:double, val 10.175)(children: 
DoubleColUnaryMinus(col 17:double)(children: DoubleScalarAddDoubleColumn(val 
-26.28, col 2:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double, 
DoubleScalarModuloDoubleColumn(val -1.389, col 5:double) -> 17:double, 
DoubleColSubtractDoubleColumn(col 1:double, co
 l 2:double)(children: col 1:float) -> 19:double, 
DoubleColModuloDoubleScalar(col 9:double, val 10.175) -> 20:double, 
DoubleColUnaryMinus(col 21:double)(children: DoubleColSubtractDoubleColumn(col 
1:double, col 2:double)(children: col 1:float) -> 21:double) -> 22:double
                   Statistics: Num rows: 303 Data size: 65146 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col2 (type: string), _col1 (type: 
float), _col4 (type: double), _col0 (type: timestamp)
                     sort order: ++++
-                    Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
-                        keyColumnNums: [0, 1, 2, 3]
-                        native: true
-                        nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        valueColumnNums: [4, 11, 13, 5, 12, 6, 14, 15, 16, 7, 
8, 18, 17, 19, 9, 20, 10, 22]
                     Statistics: Num rows: 303 Data size: 65146 Basic stats: 
COMPLETE Column stats: NONE
                     value expressions: _col3 (type: boolean), _col5 (type: 
double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 
(type: float), _col10 (type: float), _col11 (type: float), _col12 (type: 
double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), 
_col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 
(type: double), _col20 (type: double), _col21 (type: double)
         Reducer 3 

http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
index b270aea..24cae48 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
@@ -119,31 +119,14 @@ STAGE PLANS:
                     partitionColumnCount: 0
                     scratchColumnTypeNames: []
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                reduceColumnNullOrder: aaa
-                reduceColumnSortOrder: +++
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 6
-                    dataColumns: KEY._col0:double, KEY._col1:string, 
KEY._col2:timestamp, VALUE._col0:bigint, 
VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:double
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: 
FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), 
min(VALUE._col2)
-                Group By Vectorization:
-                    aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, 
VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:double, col 1:string, col 2:timestamp
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2]
                 keys: KEY._col0 (type: double), KEY._col1 (type: string), 
KEY._col2 (type: timestamp)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -151,17 +134,9 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: double), 
_col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 
9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- 
_col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 
(type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS 
decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [1, 0, 2, 6, 8, 3, 4, 7, 10, 
5, 9, 12, 4]
-                      selectExpressions: DoubleColSubtractDoubleScalar(col 
0:double, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 
7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 
9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4:double) -> 
7:double, DoubleColMultiplyDoubleColumn(col 4:double, col 9:double)(children: 
CastLongToDouble(col 3:bigint) -> 9:double) -> 10:double, 
DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0:double) -> 9:double, 
DecimalColDivideDecimalScalar(col 11:decimal(19,0), val -1.389)(children: 
CastLongToDecimal(col 3:bigint) -> 11:decimal(19,0)) -> 12:decimal(28,6)
                   Statistics: Num rows: 2048 Data size: 440327 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 2048 Data size: 440327 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
index 09267d7..f11854c 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
@@ -121,47 +121,23 @@ STAGE PLANS:
                     partitionColumnCount: 0
                     scratchColumnTypeNames: [double]
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                reduceColumnNullOrder: 
-                reduceColumnSortOrder: 
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 6
-                    dataColumns: 
VALUE._col0:struct<count:bigint,sum:double,input:smallint>, VALUE._col1:double, 
VALUE._col2:struct<count:bigint,sum:double,variance:double>, 
VALUE._col3:bigint, VALUE._col4:tinyint, 
VALUE._col5:struct<count:bigint,sum:double,input:double>
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFVarianceEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0), sum(VALUE._col1), 
var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5)
-                Group By Vectorization:
-                    aggregators: VectorUDAFAvgFinal(col 
0:struct<count:bigint,sum:double,input:smallint>) -> double, 
VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 
2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
var_pop, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFMinLong(col 
4:tinyint) -> tinyint, VectorUDAFAvgFinal(col 
5:struct<count:bigint,sum:double,input:double>) -> double
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    native: false
-                    vectorProcessingMode: GLOBAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: double), (_col0 % -563.0) (type: 
double), (_col0 + 762.0) (type: double), _col1 (type: double), _col2 (type: 
double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: 
bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0) (type: double), 
_col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 
(type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 6, 7, 1, 2, 8, 9, 3, 11, 
10, 4, 14, 5, 12]
-                      selectExpressions: DoubleColModuloDoubleScalar(col 
0:double, val -563.0) -> 6:double, DoubleColAddDoubleScalar(col 0:double, val 
762.0) -> 7:double, DoubleColUnaryMinus(col 2:double) -> 8:double, 
DoubleColSubtractDoubleColumn(col 1:double, col 0:double) -> 9:double, 
DoubleColUnaryMinus(col 10:double)(children: DoubleColSubtractDoubleColumn(col 
1:double, col 0:double) -> 10:double) -> 11:double, 
DoubleColSubtractDoubleScalar(col 2:double, val 762.0) -> 10:double, 
DoubleColAddDoubleColumn(col 12:double, col 13:double)(children: 
DoubleColUnaryMinus(col 2:double) -> 12:double, CastLongToDouble(col 4:tinyint) 
-> 13:double) -> 14:double, DoubleColSubtractDoubleColumn(col 15:double, col 
1:double)(children: DoubleColAddDoubleColumn(col 12:double, col 
13:double)(children: DoubleColUnaryMinus(col 2:double) -> 12:double, 
CastLongToDouble(col 4:tinyint) -> 13:double) -> 15:double) -> 12:double
                   Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 1 Data size: 256 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
index 444b534..253e12c 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
@@ -126,47 +126,23 @@ STAGE PLANS:
                     partitionColumnCount: 0
                     scratchColumnTypeNames: [double, decimal(22,3), 
decimal(8,3)]
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                reduceColumnNullOrder: 
-                reduceColumnSortOrder: 
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 6
-                    dataColumns: 
VALUE._col0:struct<count:bigint,sum:double,variance:double>, 
VALUE._col1:struct<count:bigint,sum:double,variance:double>, 
VALUE._col2:struct<count:bigint,sum:double,variance:double>, 
VALUE._col3:double, VALUE._col4:struct<count:bigint,sum:double,input:int>, 
VALUE._col5:struct<count:bigint,sum:double,variance:double>
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: 
FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: stddev_samp(VALUE._col0), 
stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), 
avg(VALUE._col4), stddev_pop(VALUE._col5)
-                Group By Vectorization:
-                    aggregators: VectorUDAFVarFinal(col 
0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFVarFinal(col 
1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_pop, VectorUDAFVarFinal(col 
2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: 
stddev_samp, VectorUDAFSumDouble(col 3:double) -> double, 
VectorUDAFAvgFinal(col 4:struct<count:bigint,sum:double,input:int>) -> double, 
VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: stddev_pop
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    native: false
-                    vectorProcessingMode: GLOBAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: double), (_col0 - 10.175) (type: 
double), _col1 (type: double), (_col0 * (_col0 - 10.175)) (type: double), (- 
_col1) (type: double), (_col0 % 79.553) (type: double), (- (_col0 * (_col0 - 
10.175))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 
(type: double), ((- (_col0 * (_col0 - 10.175))) / (_col0 - 10.175)) (type: 
double), (- (_col0 - 10.175)) (type: double), _col4 (type: double), (-3728.0 - 
_col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 6, 1, 8, 7, 9, 10, 2, 11, 
3, 14, 13, 4, 12, 5, 15]
-                      selectExpressions: DoubleColSubtractDoubleScalar(col 
0:double, val 10.175) -> 6:double, DoubleColMultiplyDoubleColumn(col 0:double, 
col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) 
-> 7:double) -> 8:double, DoubleColUnaryMinus(col 1:double) -> 7:double, 
DoubleColModuloDoubleScalar(col 0:double, val 79.553) -> 9:double, 
DoubleColUnaryMinus(col 11:double)(children: DoubleColMultiplyDoubleColumn(col 
0:double, col 10:double)(children: DoubleColSubtractDoubleScalar(col 0:double, 
val 10.175) -> 10:double) -> 11:double) -> 10:double, DoubleColUnaryMinus(col 
0:double) -> 11:double, DoubleColDivideDoubleColumn(col 12:double, col 
13:double)(children: DoubleColUnaryMinus(col 13:double)(children: 
DoubleColMultiplyDoubleColumn(col 0:double, col 12:double)(children: 
DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 12:double) -> 
13:double) -> 12:double, DoubleColSubtractDoubleScalar(col 0:double, val 
10.175) -> 13:double) -> 14:double,
  DoubleColUnaryMinus(col 12:double)(children: 
DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 12:double) -> 
13:double, DoubleScalarSubtractDoubleColumn(val -3728.0, col 0:double) -> 
12:double, DoubleColDivideDoubleColumn(col 4:double, col 2:double) -> 15:double
                   Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 1 Data size: 404 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
index 664ff5e..436fdd9 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
@@ -121,47 +121,23 @@ STAGE PLANS:
                     partitionColumnCount: 0
                     scratchColumnTypeNames: []
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                reduceColumnNullOrder: 
-                reduceColumnSortOrder: 
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 5
-                    dataColumns: VALUE._col0:bigint, 
VALUE._col1:struct<count:bigint,sum:double,variance:double>, 
VALUE._col2:struct<count:bigint,sum:double,input:double>, 
VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:tinyint
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), 
avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4)
-                Group By Vectorization:
-                    aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, 
VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: stddev_pop, VectorUDAFAvgFinal(col 
2:struct<count:bigint,sum:double,input:double>) -> double, 
VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: var_pop, VectorUDAFMinLong(col 4:tinyint) -> tinyint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    native: false
-                    vectorProcessingMode: GLOBAL
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
                 Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: bigint), (_col0 * -563) (type: 
bigint), (-3728 + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: 
double), _col2 (type: double), ((_col0 * -563) % _col0) (type: bigint), 
(UDFToDouble(((_col0 * -563) % _col0)) / _col2) (type: double), _col3 (type: 
double), (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2)) (type: double), 
((-3728 + _col0) - (_col0 * -563)) (type: bigint), _col4 (type: tinyint), _col4 
(type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563) % 
_col0)) / _col2))) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 5, 6, 1, 7, 2, 9, 12, 3, 
11, 14, 4, 4, 16]
-                      selectExpressions: LongColMultiplyLongScalar(col 
0:bigint, val -563) -> 5:bigint, LongScalarAddLongColumn(val -3728, col 
0:bigint) -> 6:bigint, DoubleColUnaryMinus(col 1:double) -> 7:double, 
LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: 
LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 9:bigint, 
DoubleColDivideDoubleColumn(col 11:double, col 2:double)(children: 
CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, 
col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 
8:bigint) -> 10:bigint) -> 11:double) -> 12:double, DoubleColUnaryMinus(col 
13:double)(children: DoubleColDivideDoubleColumn(col 11:double, col 
2:double)(children: CastLongToDouble(col 10:bigint)(children: 
LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: 
LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 
11:double) -> 13:double) -> 11:double, LongColSubtractLongColumn(col 8:big
 int, col 10:bigint)(children: LongScalarAddLongColumn(val -3728, col 0:bigint) 
-> 8:bigint, LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 
14:bigint, DoubleColMultiplyDoubleColumn(col 13:double, col 
15:double)(children: CastLongToDouble(col 4:tinyint) -> 13:double, 
DoubleColUnaryMinus(col 16:double)(children: DoubleColDivideDoubleColumn(col 
15:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: 
LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: 
LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 
15:double) -> 16:double) -> 15:double) -> 16:double
                   Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 1 Data size: 252 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
index b270aea..24cae48 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
@@ -119,31 +119,14 @@ STAGE PLANS:
                     partitionColumnCount: 0
                     scratchColumnTypeNames: []
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                reduceColumnNullOrder: aaa
-                reduceColumnSortOrder: +++
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 6
-                    dataColumns: KEY._col0:double, KEY._col1:string, 
KEY._col2:timestamp, VALUE._col0:bigint, 
VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:double
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: []
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: 
FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), 
min(VALUE._col2)
-                Group By Vectorization:
-                    aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, 
VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> 
double aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:double, col 1:string, col 2:timestamp
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1, 2]
                 keys: KEY._col0 (type: double), KEY._col1 (type: string), 
KEY._col2 (type: timestamp)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -151,17 +134,9 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: double), 
_col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 
9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- 
_col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 
(type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS 
decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [1, 0, 2, 6, 8, 3, 4, 7, 10, 
5, 9, 12, 4]
-                      selectExpressions: DoubleColSubtractDoubleScalar(col 
0:double, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 
7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 
9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4:double) -> 
7:double, DoubleColMultiplyDoubleColumn(col 4:double, col 9:double)(children: 
CastLongToDouble(col 3:bigint) -> 9:double) -> 10:double, 
DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0:double) -> 9:double, 
DecimalColDivideDecimalScalar(col 11:decimal(19,0), val -1.389)(children: 
CastLongToDecimal(col 3:bigint) -> 11:decimal(19,0)) -> 12:decimal(28,6)
                   Statistics: Num rows: 2048 Data size: 440327 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
                     Statistics: Num rows: 2048 Data size: 440327 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
 
b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
index 18295e1..01eb4b4 100644
--- 
a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
+++ 
b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
@@ -197,13 +197,11 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), min(VALUE._col1), 
count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
@@ -514,13 +512,11 @@ STAGE PLANS:
                 enabledConditionsNotMet: Row deserialization of vectorized 
input format not supported IS false, 
hive.vectorized.use.vectorized.input.format IS true AND 
hive.vectorized.input.format.excludes NOT CONTAINS 
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
                 inputFileFormats: 
org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), min(VALUE._col1), 
count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
@@ -843,13 +839,11 @@ STAGE PLANS:
                 usesVectorUDFAdaptor: false
                 vectorized: true
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), min(VALUE._col1), 
count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
@@ -1208,13 +1202,11 @@ STAGE PLANS:
                 enabledConditionsNotMet: 
hive.vectorized.use.vectorized.input.format IS true AND 
hive.vectorized.input.format.excludes NOT CONTAINS 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat IS false
                 inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
         Reducer 2 
-            Execution mode: vectorized
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: GROUPBY operator: Vector aggregation : 
"stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL 
not supported for evaluator GenericUDAFStdEvaluator
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), min(VALUE._col1), 
count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)

[2/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)

Reply via email to