http://git-wip-us.apache.org/repos/asf/hive/blob/029e48b7/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out new file mode 100644 index 0000000..b3e2f17 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -0,0 +1,31602 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:tinyint, VALUE._col1:tinyint, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 1:tinyint) -> tinyint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 2, 3] + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:tinyint, VALUE._col0:tinyint, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-64 62 9173 12288 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(ctinyint) as c1 +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(ctinyint) as c1 +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(ctinyint) as c1 +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(ctinyint) as c1 +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-39856 +PREHOOK: query: EXPLAIN VECTORIZATION +SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION +SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<count:bigint,sum:double,input:tinyint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(cbigint), max(cbigint), count(cbigint), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 3:bigint) -> bigint, VectorUDAFMaxLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 2, 3] + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-2147311592 2145498388 9173 12288 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cbigint) as c1 +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cbigint) as c1 +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: cbigint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(cbigint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(cbigint) as c1 +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(cbigint) as c1 +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-1698460028409 +PREHOOK: query: EXPLAIN VECTORIZATION +SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION +SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: cbigint + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(cfloat), max(cfloat), count(cfloat), count() + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 4:float) -> float, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [4] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:float, VALUE._col1:float, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 0:float) -> float, VectorUDAFMaxDouble(col 1:float) -> float, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 2, 3] + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:float, VALUE._col0:float, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-64.0 79.553 9173 12288 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cfloat) as c1 +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cfloat) as c1 +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(cfloat) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 4:float) -> double + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [4] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(cfloat) as c1 +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(cfloat) as c1 +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-39479.635992884636 +PREHOOK: query: EXPLAIN VECTORIZATION +SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION +SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<count:bigint,sum:double,input:float>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesparquet +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesparquet +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +-4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesparquet +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesparquet +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesparquet + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13:decimal(13,3))(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterLongColEqualLongScalar(col 11:boolean, val 1), FilterLongScalarEqualLongColumn(val 3569, col 0:int)(children: col 0:tinyint))) + predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint))) or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cbigint (type: bigint), cfloat (type: float) + outputColumnNames: ctinyint, cbigint, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4] + Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 3:bigint) -> struct<count:bigint,sum:double,input:bigint>, VectorUDAFVarLong(col 3:bigint) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFVarLong(col 3:bigint) -> struct<count:bigint,sum:double,variance:double> aggregation: var_samp, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(13,3), double] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:bigint>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:double, VALUE._col5:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:bigint>) -> double, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFMinLong(col 5:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 6, 7, 1, 9, 11, 2, 10, 8, 13, 12, 3, 4, 14, 15, 18, 5, 19] + selectExpressions: DoubleColUnaryMinus(col 0:double) -> 6:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 9:double, DoubleColAddDoubleColumn(col 10:double, col 8:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 11:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 12:double) -> 8:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 15:double) -> 12:double, DoubleColModuloDoubleColumn(col 2:double, col 1:double) -> 14:double, DoubleColUnaryMinus(col 2:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 17:double, col 16:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 0:double) -> 16:double) -> 18:double, LongColUnaryMinus(col 5:tinyint) -> 19:tinyint + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesparquet +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesparquet +#### A masked pattern was here #### +POSTHOOK: query: SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbi
<TRUNCATED>