Repository: hive Updated Branches: refs/heads/master cb866e894 -> 3f7127f59
HIVE-18521: Vectorization: query failing in reducer VectorUDAFAvgDecimalPartial2 java.lang.ClassCastException StructTypeInfo --> DecimalTypeInfo (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3f7127f5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3f7127f5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3f7127f5 Branch: refs/heads/master Commit: 3f7127f59b462667da42204133fbcd8ce22e743e Parents: cb866e8 Author: Matt McCline <mmccl...@hortonworks.com> Authored: Thu Jan 25 04:39:42 2018 -0600 Committer: Matt McCline <mmccl...@hortonworks.com> Committed: Thu Jan 25 04:41:04 2018 -0600 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../UDAFTemplates/VectorUDAFAvgDecimalMerge.txt | 6 +- .../vector_groupby_grouping_sets3_dec.q | 40 ++ .../vector_groupby_grouping_sets3_dec.q.out | 545 +++++++++++++++++++ 4 files changed, 590 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3f7127f5/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 1017249..1362079 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -687,6 +687,7 @@ minillaplocal.query.files=\ vector_groupby_grouping_sets1.q,\ vector_groupby_grouping_sets2.q,\ vector_groupby_grouping_sets3.q,\ + vector_groupby_grouping_sets3_dec.q,\ vector_groupby_grouping_sets4.q,\ vector_groupby_grouping_sets5.q,\ vector_groupby_grouping_sets6.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/3f7127f5/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt index 5d3e422..8ab393c 100644 --- a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt +++ b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt @@ -97,7 +97,7 @@ public class <ClassName> extends VectorAggregateExpression { #IF FINAL transient private HiveDecimalWritable tempDecWritable; -#ENDIF FINAL + DecimalTypeInfo outputDecimalTypeInfo; @@ -110,6 +110,7 @@ public class <ClassName> extends VectorAggregateExpression { * The precision of the SUM in the partial output */ private int sumPrecision; +#ENDIF FINAL // This constructor is used to momentarily create the object so match can be called. public <ClassName>() { @@ -128,10 +129,11 @@ public class <ClassName> extends VectorAggregateExpression { } private void init() { +#IF FINAL outputDecimalTypeInfo = (DecimalTypeInfo) outputTypeInfo; sumScale = outputDecimalTypeInfo.scale(); sumPrecision = outputDecimalTypeInfo.precision(); -#IF FINAL + tempDecWritable = new HiveDecimalWritable(); #ENDIF FINAL } http://git-wip-us.apache.org/repos/asf/hive/blob/3f7127f5/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3_dec.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3_dec.q b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3_dec.q new file mode 100644 index 0000000..1dff14c --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets3_dec.q @@ -0,0 +1,40 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +-- SORT_QUERY_RESULTS + +-- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b, +-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, +-- this tests that the aggregate function stores the partial aggregate state correctly even if an +-- additional MR job is created for processing the grouping sets. +CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text; +LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text; + +CREATE TABLE T1 STORED AS ORC AS SELECT a, b, cast(c as decimal(10,2)) as c_dec FROM T1_text; + +set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.new.job.grouping.set.cardinality = 30; + +-- The query below will execute in a single MR job, since 4 rows are generated per input row +-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and +-- hive.new.job.grouping.set.cardinality is more than 4. +EXPLAIN VECTORIZATION DETAIL +SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube; + +EXPLAIN VECTORIZATION DETAIL +SELECT a, b, avg(c_dec), count(*) from T1 group by cube(a, b); +SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube; + +set hive.new.job.grouping.set.cardinality=2; + +-- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2. +-- The partial aggregation state should be maintained correctly across MR jobs. +EXPLAIN VECTORIZATION DETAIL +SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube; +SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube; + http://git-wip-us.apache.org/repos/asf/hive/blob/3f7127f5/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out new file mode 100644 index 0000000..d98ce9b --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -0,0 +1,545 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT a, b, cast(c as decimal(10,2)) as c_dec FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT a, b, cast(c as decimal(10,2)) as c_dec FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c_dec EXPRESSION [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +a b c_dec +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c_dec:decimal(10,2), 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: a (type: string), b (type: string), c_dec (type: decimal(10,2)) + outputColumnNames: a, b, c_dec + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c_dec), count() + Group By Vectorization: + aggregators: VectorUDAFAvgDecimal(col 2:decimal(10,2)) -> struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 4] + Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>), _col4 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: a:string, b:string, c_dec:decimal(10,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:int, VALUE._col0:struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDecimalFinal(col 3:struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>) -> decimal(14,6), VectorUDAFCountMerge(col 4:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1] + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(14,6)), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT a, b, avg(c_dec), count(*) from T1 group by cube(a, b) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT a, b, avg(c_dec), count(*) from T1 group by cube(a, b) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c_dec:decimal(10,2), 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: a (type: string), b (type: string), c_dec (type: decimal(10,2)) + outputColumnNames: a, b, c_dec + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c_dec), count() + Group By Vectorization: + aggregators: VectorUDAFAvgDecimal(col 2:decimal(10,2)) -> struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 4] + Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>), _col4 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: a:string, b:string, c_dec:decimal(10,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:int, VALUE._col0:struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDecimalFinal(col 3:struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>) -> decimal(14,6), VectorUDAFCountMerge(col 4:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1] + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(14,6)), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.000000 2 +1 2 2.000000 1 +1 NULL 2.666667 3 +2 2 5.333333 3 +2 3 5.000000 2 +2 NULL 5.200000 5 +3 2 8.000000 1 +3 NULL 8.000000 1 +5 1 2.000000 1 +5 NULL 2.000000 1 +8 1 1.000000 2 +8 NULL 1.000000 2 +NULL 1 2.000000 5 +NULL 2 5.200000 5 +NULL 3 5.000000 2 +NULL NULL 3.833333 12 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:a:string, 1:b:string, 2:c_dec:decimal(10,2), 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: a (type: string), b (type: string), c_dec (type: decimal(10,2)) + outputColumnNames: a, b, c_dec + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(c_dec), count() + Group By Vectorization: + aggregators: VectorUDAFAvgDecimal(col 2:decimal(10,2)) -> struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2, 3] + Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>), _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: a:string, b:string, c_dec:decimal(10,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDecimalPartial2(col 2:struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>) -> struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>, VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: PARTIALS + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:int + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0, 1] + keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 4] + Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>), _col4 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:int, VALUE._col0:struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDecimalFinal(col 3:struct<count:bigint,sum:decimal(20,2),input:decimal(10,2)>) -> decimal(14,6), VectorUDAFCountMerge(col 4:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: FINAL + keyExpressions: col 0:string, col 1:string, col 2:int + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0, 1] + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(14,6)), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +a b _c2 _c3 +1 1 3.000000 2 +1 2 2.000000 1 +1 NULL 2.666667 3 +2 2 5.333333 3 +2 3 5.000000 2 +2 NULL 5.200000 5 +3 2 8.000000 1 +3 NULL 8.000000 1 +5 1 2.000000 1 +5 NULL 2.000000 1 +8 1 1.000000 2 +8 NULL 1.000000 2 +NULL 1 2.000000 5 +NULL 2 5.200000 5 +NULL 3 5.000000 2 +NULL NULL 3.833333 12