HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8975924e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8975924e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8975924e Branch: refs/heads/master Commit: 8975924ec070380069d71d325a3358fe9932befb Parents: 3df6bc2 Author: Matt McCline <mmccl...@hortonworks.com> Authored: Tue Feb 20 22:33:59 2018 -0800 Committer: Matt McCline <mmccl...@hortonworks.com> Committed: Tue Feb 20 22:33:59 2018 -0800 ---------------------------------------------------------------------- .../UDAFTemplates/VectorUDAFVarMerge.txt | 5 + .../llap/parquet_types_vectorization.q.out | 2 +- .../llap/vector_decimal_aggregate.q.out | 66 +------ .../llap/vector_decimal_udf.q.out | 100 ++-------- .../llap/vector_reuse_scratchcols.q.out | 58 +----- .../llap/vector_string_decimal.q.out | 137 ++++++++++++++ .../llap/vector_udf_string_to_boolean.q.out | 189 +++++++++++++++++++ .../clientpositive/llap/vectorization_0.q.out | 50 ++--- .../clientpositive/llap/vectorization_1.q.out | 29 +-- .../clientpositive/llap/vectorization_12.q.out | 33 +--- .../clientpositive/llap/vectorization_13.q.out | 57 +----- .../clientpositive/llap/vectorization_14.q.out | 33 +--- .../clientpositive/llap/vectorization_16.q.out | 30 +-- .../clientpositive/llap/vectorization_2.q.out | 29 +-- .../clientpositive/llap/vectorization_3.q.out | 29 +-- .../clientpositive/llap/vectorization_4.q.out | 29 +-- .../clientpositive/llap/vectorization_9.q.out | 30 +-- .../vectorization_input_format_excludes.q.out | 28 ++- .../llap/vectorization_part_project.q.out | 12 +- .../llap/vectorization_short_regress.q.out | 186 +++--------------- .../llap/vectorized_mapjoin3.q.out | 100 ++++------ .../llap/vectorized_parquet.q.out | 7 +- .../llap/vectorized_parquet_types.q.out | 19 +- .../llap/vectorized_timestamp.q.out | 24 +-- .../llap/vectorized_timestamp_funcs.q.out | 22 +-- .../spark/parquet_vectorization_0.q.out | 46 +---- .../spark/parquet_vectorization_1.q.out | 28 +-- .../spark/parquet_vectorization_12.q.out | 32 +--- .../spark/parquet_vectorization_13.q.out | 55 +----- .../spark/parquet_vectorization_14.q.out | 32 +--- .../spark/parquet_vectorization_16.q.out | 29 +-- .../spark/parquet_vectorization_2.q.out | 28 +-- .../spark/parquet_vectorization_3.q.out | 28 +-- .../spark/parquet_vectorization_4.q.out | 28 +-- .../spark/parquet_vectorization_9.q.out | 29 +-- .../spark/vector_decimal_aggregate.q.out | 64 +------ .../clientpositive/spark/vectorization_0.q.out | 46 +---- .../clientpositive/spark/vectorization_1.q.out | 28 +-- .../clientpositive/spark/vectorization_12.q.out | 32 +--- .../clientpositive/spark/vectorization_13.q.out | 55 +----- .../clientpositive/spark/vectorization_14.q.out | 32 +--- .../clientpositive/spark/vectorization_16.q.out | 29 +-- .../clientpositive/spark/vectorization_2.q.out | 28 +-- .../clientpositive/spark/vectorization_3.q.out | 28 +-- .../clientpositive/spark/vectorization_4.q.out | 28 +-- .../clientpositive/spark/vectorization_9.q.out | 29 +-- .../vectorization_input_format_excludes.q.out | 24 +-- .../spark/vectorization_short_regress.q.out | 178 ++--------------- .../spark/vectorized_timestamp_funcs.q.out | 21 +-- 49 files changed, 575 insertions(+), 1686 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt index 9b1c1cd..ccc5a22 100644 --- a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt +++ b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt @@ -487,6 +487,9 @@ public class <ClassName> extends VectorAggregateExpression { * Mode FINAL. #ENDIF FINAL */ + +/* + There seems to be a Wrong Results bug in VectorUDAFVarFinal -- disabling vectorization for now... return GenericUDAFVariance.isVarianceFamilyName(name) && inputColVectorType == ColumnVector.Type.STRUCT && @@ -498,6 +501,8 @@ public class <ClassName> extends VectorAggregateExpression { outputColVectorType == ColumnVector.Type.DOUBLE && mode == Mode.FINAL; #ENDIF FINAL +*/ + return false; } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out index d62d947..1ccdff8 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out @@ -209,7 +209,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 0a72b3f..5cb3e66 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -303,55 +303,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 16 - dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(20,10), VALUE._col2:decimal(20,10), VALUE._col3:decimal(30,10), VALUE._col4:struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>, VALUE._col5:struct<count:bigint,sum:double,variance:double>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:bigint, VALUE._col8:decimal(23,14), VALUE._col9:decimal(23,14), VALUE._col10:decimal(33,14), VALUE._col11:struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>, VALUE._col12:struct<count:bigint,sum:double,variance:double>, VALUE._col13:struct<count:bigint,sum:double,variance:double>, VALUE._col14:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 3:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 4:decimal(30,10)) -> decimal(30,10), VectorUDAFAvgDecimalFinal(col 5:struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>) -> decimal(24,14), VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDecimal(col 9:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 10:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 11:decimal(33,14)) -> decimal(33,14), VectorUDAFAvgDecimalFinal(col 12:struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>) -> decimal(27,18), VectorUDAFVarFinal(col 13:struct<count:bigint,sum:do uble,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 14:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 15:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1) predicate: (_col15 > 1) (type: boolean) Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -716,55 +689,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 16 - dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:struct<count:bigint,sum:decimal(21,5),input:decimal(11,5)>, VALUE._col5:struct<count:bigint,sum:double,variance:double>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:bigint, VALUE._col8:decimal(16,0), VALUE._col9:decimal(16,0), VALUE._col10:decimal(26,0), VALUE._col11:struct<count:bigint,sum:decimal(26,0),input:decimal(16,0)>, VALUE._col12:struct<count:bigint,sum:double,variance:double>, VALUE._col13:struct<count:bigint,sum:double,variance:double>, VALUE._col14:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFAvgDecimalFinal(col 5:struct<count:bigint,sum:decimal(21,5),input:decimal(11,5)>) -> decimal(15,9), VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDecimal(col 9:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 10:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 11:decimal(26,0)) -> decimal(26,0), VectorUDAFAvgDecimalFinal(col 12:struct<count:bigint,sum:decimal(26,0),input:decimal(16,0)>) -> decimal(20,4), VectorUDAFVarFinal(col 13:struct<count:bigint,sum:double,variance:doub le>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 14:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 15:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1) predicate: (_col15 > 1) (type: boolean) Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index a306a17..f96c769 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -3280,40 +3280,21 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY._col0:int, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double> - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev(VALUE._col0), variance(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: variance - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3434,40 +3415,21 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY._col0:int, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double> - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7263,40 +7225,21 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY._col0:int, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double> - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev(VALUE._col0), variance(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: variance - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7418,40 +7361,21 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 3 - dataColumns: KEY._col0:int, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double> - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index 6528b6f..b9c1ba3 100644 --- a/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -151,47 +151,24 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [double, decimal(22,3), decimal(13,3)] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: - reduceColumnSortOrder: - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 9 - dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:int>, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:struct<count:bigint,sum:double,input:float>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:tinyint, VALUE._col8:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) - Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:float>) -> double, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - native: false - vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25] - selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0:double) -> 12:double, DoubleColMultiplyDoubleColumn(col 16:double, col 13:double)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 15:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryM inus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 13:double, DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 2:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 18:double, DoubleColUnaryMinus(col 19:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 19:double) -> 20:double, DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 21:double, DoubleColUnaryMinus(col 22:double)(children: DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 22:double) -> 19:double, DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 22:double, DoubleColDivideDoubleColumn(col 23:double, col 25:double)(children: CastLongToDouble(col 7:tinyint) -> 23:double, DoubleColDivideDoubleScalar(col 24:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 24:double) -> 25:double) -> 24:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleColumn(col 0:doubl e, col 1:double) -> 23:double) -> 25:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -357,47 +334,24 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [double, double, double, decimal(22,3), decimal(13,3)] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: - reduceColumnSortOrder: - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 9 - dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:int>, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:struct<count:bigint,sum:double,input:float>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:tinyint, VALUE._col8:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) - Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:float>) -> double, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - native: false - vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 9, 11, 14, 19, 1, 20, 2, 29, 3, 30, 34, 39, 4, 5, 40, 42, 44, 6, 47, 48, 7, 8, 52, 54] - selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 17:double, col 18:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 15:double) -> 16:double) -> 17:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 0:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 25:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: DoubleColUnaryMinus(col 22:double)(children: DoubleColUnaryM inus(col 21:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 21:double) -> 22:double) -> 23:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 24:double) -> 25:double, DoubleColUnaryMinus(col 27:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 26:double) -> 27:double) -> 28:double) -> 29:double, DoubleColUnaryMinus(col 2:double) -> 30:double, DoubleColSubtractDoubleColumn(col 2:double, col 33:double)(children: DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 31:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DoubleColMultiplyDoubleColumn(col 38:double, col 2:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 37:double)(children: DoubleColUnaryMinus(col 36:double)(children: DoubleColUnaryMinus(col 35:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 35:double) -> 36:double) -> 37:double) -> 38:double) -> 39:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 40:double, DoubleColUnaryMinus(col 41:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 41:double) -> 42:double, DoubleColDivideDoubleScalar(col 43:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 43:double) -> 44:double, DoubleColUnaryMinus(col 46:double)(children: DoubleColDivideDoubleScalar(col 45:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 45:double) -> 46:double) -> 47:double, DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 48:double, DoubleColDivideDoubleColumn(col 49:double, col 51:double)(children: CastLongToDouble(col 7:tinyint) -> 49:double, DoubleColDivideDoubleScalar(col 50:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 50:double) -> 51:double) -> 52:double, DoubleColUnaryMinus(col 53:double)(children: DoubleColDivideDoubleColumn(col 0:doubl e, col 1:double) -> 53:double) -> 54:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out b/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out new file mode 100644 index 0000000..d792c46 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out @@ -0,0 +1,137 @@ +PREHOOK: query: drop table orc_decimal +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table orc_decimal +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table orc_decimal (id decimal(18,0)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_decimal +POSTHOOK: query: create table orc_decimal (id decimal(18,0)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_decimal +PREHOOK: query: create table staging (id decimal(18,0)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@staging +POSTHOOK: query: create table staging (id decimal(18,0)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@staging +PREHOOK: query: insert into staging values (34324.0), (100000000.0), (200000000.0), (300000000.0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@staging +POSTHOOK: query: insert into staging values (34324.0), (100000000.0), (200000000.0), (300000000.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@staging +POSTHOOK: Lineage: staging.id SCRIPT [] +PREHOOK: query: insert overwrite table orc_decimal select id from staging +PREHOOK: type: QUERY +PREHOOK: Input: default@staging +PREHOOK: Output: default@orc_decimal +POSTHOOK: query: insert overwrite table orc_decimal select id from staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@orc_decimal +POSTHOOK: Lineage: orc_decimal.id SIMPLE [(staging)staging.FieldSchema(name:id, type:decimal(18,0), comment:null), ] +PREHOOK: query: explain vectorization expression +select * from orc_decimal where id in ('100000000', '200000000') +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from orc_decimal where id in ('100000000', '200000000') +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_decimal + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterDoubleColumnInList(col 2:double, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 0:decimal(18,0)) -> 2:double) + predicate: (UDFToDouble(id)) IN (1.0E8, 2.0E8) (type: boolean) + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: decimal(18,0)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from orc_decimal where id in ('100000000', '200000000') +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_decimal +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_decimal where id in ('100000000', '200000000') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_decimal +#### A masked pattern was here #### +100000000 +200000000 +PREHOOK: query: drop table orc_decimal +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_decimal +PREHOOK: Output: default@orc_decimal +POSTHOOK: query: drop table orc_decimal +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_decimal +POSTHOOK: Output: default@orc_decimal +PREHOOK: query: drop table staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@staging +PREHOOK: Output: default@staging +POSTHOOK: query: drop table staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@staging +POSTHOOK: Output: default@staging http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out b/ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out new file mode 100644 index 0000000..647fcb7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out @@ -0,0 +1,189 @@ +PREHOOK: query: create table t (s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values ('false') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('false') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('FALSE') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('FALSE') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('FaLsE') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('FaLsE') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('true') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('true') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('TRUE') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('TRUE') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('TrUe') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('TrUe') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('Other') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('Other') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('Off') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('Off') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('No') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('No') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('0') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('0') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: insert into t values ('1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SCRIPT [] +PREHOOK: query: explain select s,cast(s as boolean) from t order by s +PREHOOK: type: QUERY +POSTHOOK: query: explain select s,cast(s as boolean) from t order by s +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 12 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s (type: string), UDFToBoolean(s) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s,cast(s as boolean) from t order by s +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select s,cast(s as boolean) from t order by s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### + false +0 false +1 true +FALSE false +FaLsE false +No false +Off false +Other true +TRUE true +TrUe true +false false +true true http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_0.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 2333716..49c3036 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -432,13 +432,12 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -940,13 +939,12 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -1448,13 +1446,12 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -1656,47 +1653,24 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [decimal(13,3), double] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: - reduceColumnSortOrder: - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 6 - dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:bigint>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:double, VALUE._col5:tinyint - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) - Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:bigint>) -> double, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFMinLong(col 5:tinyint) -> tinyint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - native: false - vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 6, 7, 1, 9, 11, 2, 10, 8, 13, 12, 3, 4, 14, 15, 18, 5, 19] - selectExpressions: DoubleColUnaryMinus(col 0:double) -> 6:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 9:double, DoubleColAddDoubleColumn(col 10:double, col 8:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 11:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 12:double) -> 8:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 15:double) -> 12:double, DoubleColModuloDoubleColumn(col 2:double, col 1:double) -> 14:double, DoubleColUnaryMinus(col 2:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 17:double, col 16:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 0:double) -> 16:double) -> 18:double, LongColUnaryMinus(col 5:tinyint) -> 19:tinyint Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_1.q.out b/ql/src/test/results/clientpositive/llap/vectorization_1.q.out index 278bd0c..d2de8e7 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -119,47 +119,24 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [double] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: - reduceColumnSortOrder: - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 6 - dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:double, VALUE._col2:tinyint, VALUE._col3:int, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) - Group By Vectorization: - aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFMaxLong(col 2:tinyint) -> tinyint, VectorUDAFMaxLong(col 3:int) -> int, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 5:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - native: false - vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (_col0 / -26.28) (type: double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * (-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * (-1.389 + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 6, 1, 7, 9, 2, 8, 3, 12, 4, 13, 5, 14] - selectExpressions: DoubleColDivideDoubleScalar(col 0:double, val -26.28) -> 6:double, DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 10:double) -> 8:double, DecimalColMultiplyDecimalScalar(col 11:decimal(10,0), val 79.553)(children: CastLongToDecimal(col 3:int) -> 11:decimal(10,0)) -> 12:decimal(16,3), DoubleScalarModuloDoubleColumn(val 10.175, col 10:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 10:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 10:double) -> 13:double) -> 10:double) -> 13:double, LongScalarModuloLongColumn(v al -563, col 3:int) -> 14:int Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out index 1285b25..77f8e3b 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -146,31 +146,15 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aaaa - reduceColumnSortOrder: ++++ - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 9 - dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,input:double>, VALUE._col3:bigint, VALUE._col4:struct<count:bigint,sum:double,variance:double> - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFAvgFinal(col 6:struct<count:bigint,sum:double,input:double>) -> double, VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:double, col 1:bigint, col 2:string, col 3:boolean - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -178,21 +162,10 @@ STAGE PLANS: Select Operator expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 3, 2, 0, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8] - selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 9:double, LongColUnaryMinus(col 1:bigint) -> 10:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 11:bigint, DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 16:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 16:decimal(19,0)) -> 17:decimal(22,2), DoubleColDivideDoubleColumn(col 6:double, col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0: double) -> 14:double) -> 18:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColDivideDoubleScalar(col 14:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 19:double) -> 14:double) -> 19:double, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColDivideDoubleScalar(col 14:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 20:double, DoubleColUnaryMinus(col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 21:double) -> 14:double Statistics: Num rows: 1 Data size: 338 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) sort order: +++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8] Statistics: Num rows: 1 Data size: 338 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3