hive git commit: HIVE-15126: Branch-1.2: Fix TestCliDriver.join_merge_multi_expressions.q (Vaibhav Gumashta reviewed by Daniel Dai)
Repository: hive Updated Branches: refs/heads/branch-1.2 8bc7daeb0 -> 07c86120e HIVE-15126: Branch-1.2: Fix TestCliDriver.join_merge_multi_expressions.q (Vaibhav Gumashta reviewed by Daniel Dai) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/07c86120 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/07c86120 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/07c86120 Branch: refs/heads/branch-1.2 Commit: 07c86120e249153475b68f4ca1b40ec62b3ac1a2 Parents: 8bc7dae Author: Vaibhav GumashtaAuthored: Sun Mar 19 13:46:59 2017 -0700 Committer: Vaibhav Gumashta Committed: Sun Mar 19 13:46:59 2017 -0700 -- .../join_merge_multi_expressions.q.out | 46 1 file changed, 29 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/07c86120/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out -- diff --git a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out index a8bd4df..b73643e 100644 --- a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out @@ -21,42 +21,54 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string), hr (type: string) -sort order: ++ -Map-reduce partition columns: key (type: string), hr (type: string) + Select Operator +expressions: key (type: string), hr (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan -alias: b +alias: a Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string), hr (type: string) -sort order: ++ -Map-reduce partition columns: key (type: string), hr (type: string) + Select Operator +expressions: key (type: string), hr (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan -alias: c +alias: a Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string), hr (type: string) -sort order: ++ -Map-reduce partition columns: key (type: string), hr (type: string) + Select Operator +expressions: key (type: string), hr (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree:
hive git commit: HIVE-16110: Vectorization: Support 2 Value CASE WHEN instead of fall back to VectorUDFAdaptor (Matt McCline, reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 7fc60962f -> cd4fcefb8 HIVE-16110: Vectorization: Support 2 Value CASE WHEN instead of fall back to VectorUDFAdaptor (Matt McCline, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cd4fcefb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cd4fcefb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cd4fcefb Branch: refs/heads/master Commit: cd4fcefb83c0151d213ed9ec1a9238ed7fc6b683 Parents: 7fc6096 Author: Matt McClineAuthored: Sun Mar 19 14:30:29 2017 -0500 Committer: Matt McCline Committed: Sun Mar 19 14:30:29 2017 -0500 -- .../ql/exec/vector/VectorizationContext.java| 50 .../vector_groupby_grouping_id3.q | 4 +- .../queries/clientpositive/vectorized_case.q| 18 ++ .../llap/vector_between_columns.q.out | 2 +- .../llap/vector_groupby_grouping_id3.q.out | 122 - .../clientpositive/llap/vectorized_case.q.out | 256 +++ .../clientpositive/spark/vectorized_case.q.out | 252 ++ .../clientpositive/vector_between_columns.q.out | 2 +- .../clientpositive/vectorized_case.q.out| 222 9 files changed, 919 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/cd4fcefb/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 2e27fd5..c3940cb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1461,6 +1461,8 @@ public class VectorizationContext { ve = getBetweenFilterExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFIn) { ve = getInExpression(childExpr, mode, returnType); +} else if (udf instanceof GenericUDFWhen) { + ve = getWhenExpression(childExpr, mode, returnType); } else if (udf instanceof GenericUDFOPPositive) { ve = getIdentityExpression(childExpr); } else if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFNvl) { @@ -2320,6 +2322,54 @@ public class VectorizationContext { return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } + private boolean isColumnOrNonNullConst(ExprNodeDesc exprNodeDesc) { +if (exprNodeDesc instanceof ExprNodeColumnDesc) { + return true; +} +if (exprNodeDesc instanceof ExprNodeConstantDesc) { + String typeString = exprNodeDesc.getTypeString(); + if (!typeString.equalsIgnoreCase("void")) { +return true; + } +} +return false; + } + + private VectorExpression getWhenExpression(List childExpr, + VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { + +if (mode != VectorExpressionDescriptor.Mode.PROJECTION) { + return null; +} +if (childExpr.size() != 3) { + // For now, we only optimize the 2 value case. + return null; +} + +/* + * When we have 2 simple values: + * CASE WHEN boolExpr THEN column | const ELSE column | const END + * then we can convert to:IF (boolExpr THEN column | const ELSE column | const) + */ +// CONSIDER: Adding a version of IfExpr* than can handle a non-column/const expression in the +// THEN or ELSE. +ExprNodeDesc exprNodeDesc1 = childExpr.get(1); +ExprNodeDesc exprNodeDesc2 = childExpr.get(2); +if (isColumnOrNonNullConst(exprNodeDesc1) && +isColumnOrNonNullConst(exprNodeDesc2)) { + // Yes. + GenericUDFIf genericUDFIf = new GenericUDFIf(); + return + getVectorExpressionForUdf( +genericUDFIf, +GenericUDFIf.class, +childExpr, +mode, +returnType); +} +return null; // Not handled by vector classes yet. + } + /* * Return vector expression for a custom (i.e. not built-in) UDF. */ http://git-wip-us.apache.org/repos/asf/hive/blob/cd4fcefb/ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q -- diff --git a/ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q b/ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q index 29e9211..732b2d8 100644 --- a/ql/src/test/queries/clientpositive/vector_groupby_grouping_id3.q +++
[2/5] hive git commit: HIVE-15857: Vectorization: Add string conversion case for UDFToInteger, etc (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/7fc60962/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive.q.out index 526cd21..4e94322 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_primitive.q.out @@ -242,25 +242,49 @@ POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part= POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).c9 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num boolean1boolean1boolean1boolean1 boolean1boolean1boolean1boolean1boolean1 tinyint1tinyint1tinyint1tinyint1tinyint1 tinyint1tinyint1tinyint1tinyint1tinyint1 tinyint1smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 int1int1int1int1int1int1 int1int1int1int1int1bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -Stage-0 - Fetch Operator -limit:-1 -Stage-1 - Map 1 llap - File Output Operator [FS_2] -Select Operator [SEL_1] (rows=10 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"] - TableScan [TS_0] (rows=10 width=1168) - default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: part_change_various_various_boolean_to_bigint + Statistics: Num rows: 10 Data size: 11688 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator +expressions: insert_num (type: int), part (type: int), c1 (type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 (type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type: boolean), c9 (type: boolean), c10 (type: tinyint), c11 (type: tinyint), c12 (type: tinyint), c13 (type: tinyint), c14 (type: tinyint), c15 (type: tinyint), c16 (type: tinyint), c17 (type: tinyint), c18 (type: tinyint), c19 (type: tinyint), c20 (type: tinyint), c21 (type: smallint), c22 (type: smallint), c23 (type: smallint), c24 (type: smallint), c25
[5/5] hive git commit: HIVE-15857: Vectorization: Add string conversion case for UDFToInteger, etc (Matt McCline, reviewed by Sergey Shelukhin)
HIVE-15857: Vectorization: Add string conversion case for UDFToInteger, etc (Matt McCline, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7fc60962 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7fc60962 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7fc60962 Branch: refs/heads/master Commit: 7fc60962f23c41ad9c245d704cba8fc8c940a884 Parents: 284c8f3 Author: Matt McClineAuthored: Sun Mar 19 10:53:48 2017 -0500 Committer: Matt McCline Committed: Sun Mar 19 10:53:48 2017 -0500 -- .../vector/expressions/CastStringToLong.java| 271 +++ .../apache/hadoop/hive/ql/udf/UDFToBoolean.java | 3 +- .../apache/hadoop/hive/ql/udf/UDFToByte.java| 3 +- .../apache/hadoop/hive/ql/udf/UDFToInteger.java | 3 +- .../apache/hadoop/hive/ql/udf/UDFToLong.java| 3 +- .../apache/hadoop/hive/ql/udf/UDFToShort.java | 3 +- .../clientpositive/schema_evol_orc_acid_part.q | 15 +- .../schema_evol_orc_acid_part_update.q | 3 +- .../clientpositive/schema_evol_orc_acid_table.q | 9 +- .../schema_evol_orc_acid_table_update.q | 3 +- .../schema_evol_orc_acidvec_part.q | 46 +- .../schema_evol_orc_acidvec_part_update.q | 3 +- .../schema_evol_orc_acidvec_table.q | 48 +- .../schema_evol_orc_acidvec_table_update.q | 3 +- .../schema_evol_orc_nonvec_part.q | 22 +- .../schema_evol_orc_nonvec_part_all_complex.q | 10 +- .../schema_evol_orc_nonvec_part_all_primitive.q | 14 +- .../schema_evol_orc_nonvec_table.q | 14 +- .../clientpositive/schema_evol_orc_vec_part.q | 2 +- .../schema_evol_orc_vec_part_all_complex.q | 2 +- .../schema_evol_orc_vec_part_all_primitive.q| 2 +- .../clientpositive/schema_evol_orc_vec_table.q | 2 +- .../schema_evol_text_nonvec_part.q | 22 +- .../schema_evol_text_nonvec_part_all_complex.q | 10 +- ...schema_evol_text_nonvec_part_all_primitive.q | 14 +- .../schema_evol_text_nonvec_table.q | 14 +- .../clientpositive/schema_evol_text_vec_part.q | 2 +- .../schema_evol_text_vec_part_all_complex.q | 2 +- .../schema_evol_text_vec_part_all_primitive.q | 2 +- .../clientpositive/schema_evol_text_vec_table.q | 2 +- .../schema_evol_text_vecrow_part.q | 2 +- .../schema_evol_text_vecrow_part_all_complex.q | 2 +- ...schema_evol_text_vecrow_part_all_primitive.q | 2 +- .../schema_evol_text_vecrow_table.q | 2 +- .../llap/schema_evol_orc_acid_part.q.out| 91 +-- .../llap/schema_evol_orc_acid_table.q.out | 11 + .../llap/schema_evol_orc_acidvec_part.q.out | 700 +-- .../llap/schema_evol_orc_acidvec_table.q.out| 686 ++ .../llap/schema_evol_orc_nonvec_part.q.out | 450 ...chema_evol_orc_nonvec_part_all_complex.q.out | 150 ++-- ...ema_evol_orc_nonvec_part_all_primitive.q.out | 250 +-- .../llap/schema_evol_orc_nonvec_table.q.out | 250 +-- .../llap/schema_evol_text_nonvec_part.q.out | 450 ...hema_evol_text_nonvec_part_all_complex.q.out | 150 ++-- ...ma_evol_text_nonvec_part_all_primitive.q.out | 250 +-- .../llap/schema_evol_text_nonvec_table.q.out| 250 +-- .../clientpositive/llap/vector_bucket.q.out | 4 +- .../clientpositive/llap/vector_char_2.q.out | 8 +- .../clientpositive/llap/vector_coalesce_2.q.out | 4 +- .../results/clientpositive/vector_char_2.q.out | 8 +- .../clientpositive/vector_coalesce_2.q.out | 4 +- 51 files changed, 3473 insertions(+), 803 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/7fc60962/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java new file mode 100644 index 000..5a8a825 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java @@ -0,0 +1,271 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or
[3/5] hive git commit: HIVE-15857: Vectorization: Add string conversion case for UDFToInteger, etc (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/7fc60962/ql/src/test/results/clientpositive/llap/schema_evol_orc_acidvec_table.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_acidvec_table.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_acidvec_table.q.out index ce33ed0..5e08bb4 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_acidvec_table.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_acidvec_table.q.out @@ -55,6 +55,55 @@ POSTHOOK: Lineage: table_add_int_permute_select.b SIMPLE [(values__tmp__table__1 POSTHOOK: Lineage: table_add_int_permute_select.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_permute_select.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 +PREHOOK: query: explain vectorization only detail +select insert_num,a,b,c from table_add_int_permute_select +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select insert_num,a,b,c from table_add_int_permute_select +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + Vertices: +Map 1 +Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] +Select Vectorization: +className: VectorSelectOperator +native: true +projectedOutputColumns: [0, 1, 2, 3] + File Sink Vectorization: + className: VectorFileSinkOperator + native: false +Execution mode: vectorized, llap +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true +groupByVectorOutput: true +inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +allNative: false +usesVectorUDFAdaptor: false +vectorized: true +rowBatchContext: +dataColumnCount: 4 +includeColumns: [0, 1, 2, 3] +dataColumns: insert_num:int, a:int, b:string, c:int +partitionColumnCount: 0 + + Stage: Stage-0 +Fetch Operator + PREHOOK: query: select insert_num,a,b from table_add_int_permute_select PREHOOK: type: QUERY PREHOOK: Input: default@table_add_int_permute_select @@ -148,6 +197,55 @@ POSTHOOK: Lineage: table_add_int_string_permute_select.c EXPRESSION [(values__tm POSTHOOK: Lineage: table_add_int_string_permute_select.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: table_add_int_string_permute_select.insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 +PREHOOK: query: explain vectorization only detail +select insert_num,a,b,c,d from table_add_int_string_permute_select +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only detail +select insert_num,a,b,c,d from table_add_int_string_permute_select +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + Vertices: +Map 1 +Map Operator Tree: + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] +Select Vectorization: +className: VectorSelectOperator +native: true +projectedOutputColumns: [0, 1, 2, 3, 4] + File Sink Vectorization: + className: VectorFileSinkOperator + native: false +Execution mode: vectorized, llap +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true +groupByVectorOutput: true +inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +allNative: false +usesVectorUDFAdaptor: false +
[1/5] hive git commit: HIVE-15857: Vectorization: Add string conversion case for UDFToInteger, etc (Matt McCline, reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 284c8f3bf -> 7fc60962f http://git-wip-us.apache.org/repos/asf/hive/blob/7fc60962/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_primitive.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_primitive.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_primitive.q.out index 03c6936..757ea3a 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_primitive.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_primitive.q.out @@ -242,25 +242,49 @@ POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part= POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).c9 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, type:boolean, comment:null), ] POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num boolean1boolean1boolean1boolean1 boolean1boolean1boolean1boolean1boolean1 tinyint1tinyint1tinyint1tinyint1tinyint1 tinyint1tinyint1tinyint1tinyint1tinyint1 tinyint1smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 smallint1 int1int1int1int1int1int1 int1int1int1int1int1bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b from part_change_various_various_boolean_to_bigint POSTHOOK: type: QUERY Explain -Plan optimized by CBO. +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -Stage-0 - Fetch Operator -limit:-1 -Stage-1 - Map 1 llap - File Output Operator [FS_2] -Select Operator [SEL_1] (rows=10 width=4) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"] - TableScan [TS_0] (rows=10 width=512) - default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: part_change_various_various_boolean_to_bigint + Statistics: Num rows: 10 Data size: 5126 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator +expressions: insert_num (type: int), part (type: int), c1 (type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 (type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type: boolean), c9 (type: boolean), c10 (type: tinyint), c11 (type: tinyint), c12 (type: tinyint), c13 (type: tinyint), c14 (type: tinyint), c15 (type: tinyint), c16 (type: tinyint), c17 (type: tinyint), c18 (type: tinyint), c19 (type: tinyint), c20 (type: tinyint), c21 (type:
[4/5] hive git commit: HIVE-15857: Vectorization: Add string conversion case for UDFToInteger, etc (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/7fc60962/ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q -- diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q b/ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q index bbf03af..166b34a 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q +++ b/ql/src/test/queries/clientpositive/schema_evol_text_vec_table.q @@ -1,10 +1,10 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=false; SET hive.vectorized.use.vector.serde.deserialize=true; SET hive.vectorized.use.row.serde.deserialize=false; -set hive.fetch.task.conversion=none; SET hive.vectorized.execution.enabled=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.metastore.disallow.incompatible.col.type.changes=true; http://git-wip-us.apache.org/repos/asf/hive/blob/7fc60962/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q -- diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q index 11df12e..0ab1f33 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q +++ b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q @@ -1,11 +1,11 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=false; SET hive.vectorized.use.vector.serde.deserialize=false; SET hive.vectorized.use.row.serde.deserialize=true; -set hive.fetch.task.conversion=none; SET hive.vectorized.execution.enabled=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.metastore.disallow.incompatible.col.type.changes=true; http://git-wip-us.apache.org/repos/asf/hive/blob/7fc60962/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q -- diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q index dfd55d9..b4a9d66 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q +++ b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q @@ -1,11 +1,11 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=false; SET hive.vectorized.use.vector.serde.deserialize=false; SET hive.vectorized.use.row.serde.deserialize=true; -set hive.fetch.task.conversion=none; SET hive.vectorized.execution.enabled=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.metastore.disallow.incompatible.col.type.changes=false; http://git-wip-us.apache.org/repos/asf/hive/blob/7fc60962/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q -- diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q index d71c6b8..83fc173 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q +++ b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_primitive.q @@ -1,11 +1,11 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; set hive.cli.print.header=true; SET hive.exec.schema.evolution=true; SET hive.vectorized.use.vectorized.input.format=false; SET hive.vectorized.use.vector.serde.deserialize=false; SET hive.vectorized.use.row.serde.deserialize=true; -set hive.fetch.task.conversion=none; SET hive.vectorized.execution.enabled=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.metastore.disallow.incompatible.col.type.changes=false; http://git-wip-us.apache.org/repos/asf/hive/blob/7fc60962/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q -- diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q index d4209a5..3059604 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q +++ b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_table.q @@ -1,10 +1,10 @@ set hive.explain.user=false; +set hive.fetch.task.conversion=none; set
hive git commit: HIVE-16245: Vectorization: Does not handle non-column key expressions in MERGEPARTIAL mode (Matt McCline, reviewed by Jason Dere)
Repository: hive Updated Branches: refs/heads/master 47ef02e48 -> 284c8f3bf HIVE-16245: Vectorization: Does not handle non-column key expressions in MERGEPARTIAL mode (Matt McCline, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/284c8f3b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/284c8f3b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/284c8f3b Branch: refs/heads/master Commit: 284c8f3bfccc227d4858870aee4f41c0c6df4f82 Parents: 47ef02e Author: Matt McClineAuthored: Sun Mar 19 02:25:34 2017 -0500 Committer: Matt McCline Committed: Sun Mar 19 02:25:34 2017 -0500 -- .../hive/ql/optimizer/physical/Vectorizer.java | 11 ++ .../hive/ql/exec/vector/VectorizedRowBatch.java | 37 .../ql/exec/vector/TestStructColumnVector.java | 3 +- 3 files changed, 50 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/284c8f3b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 2b3eab4..32ec1d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -2016,6 +2016,17 @@ public class Vectorizer implements PhysicalPlanResolver { return false; } +if (processingMode == ProcessingMode.MERGE_PARTIAL) { + // For now, VectorGroupByOperator ProcessingModeReduceMergePartial cannot handle key + // expressions. + for (ExprNodeDesc keyExpr : desc.getKeys()) { +if (!(keyExpr instanceof ExprNodeColumnDesc)) { + setExpressionIssue("Key", "Non-column key expressions not supported for MERGEPARTIAL"); + return false; +} + } +} + Pair retPair = validateAggregationDescs(desc.getAggregators(), processingMode, hasKeys); if (!retPair.left) { http://git-wip-us.apache.org/repos/asf/hive/blob/284c8f3b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java -- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 0235ffc..278865f 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -137,6 +137,43 @@ public class VectorizedRowBatch implements Writable { return ""; } StringBuilder b = new StringBuilder(); +b.append("Column vector types: "); +for (int k = 0; k < projectionSize; k++) { + int projIndex = projectedColumns[k]; + ColumnVector cv = cols[projIndex]; + if (k > 0) { +b.append(", "); + } + b.append(projIndex); + b.append(":"); + String colVectorType = null; + if (cv instanceof LongColumnVector) { +colVectorType = "LONG"; + } else if (cv instanceof DoubleColumnVector) { +colVectorType = "DOUBLE"; + } else if (cv instanceof BytesColumnVector) { +colVectorType = "BYTES"; + } else if (cv instanceof DecimalColumnVector) { +colVectorType = "DECIMAL"; + } else if (cv instanceof TimestampColumnVector) { +colVectorType = "TIMESTAMP"; + } else if (cv instanceof IntervalDayTimeColumnVector) { +colVectorType = "INTERVAL_DAY_TIME"; + } else if (cv instanceof ListColumnVector) { +colVectorType = "LIST"; + } else if (cv instanceof MapColumnVector) { +colVectorType = "MAP"; + } else if (cv instanceof StructColumnVector) { +colVectorType = "STRUCT"; + } else if (cv instanceof UnionColumnVector) { +colVectorType = "UNION"; + } else { +colVectorType = "Unknown"; + } + b.append(colVectorType); +} +b.append('\n'); + if (this.selectedInUse) { for (int j = 0; j < size; j++) { int i = selected[j]; http://git-wip-us.apache.org/repos/asf/hive/blob/284c8f3b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java -- diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java index 9ac7ba0..c175ed0
[2/7] hive git commit: HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out new file mode 100644 index 000..91d9e88 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -0,0 +1,568 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube LIMIT 10 +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Reducer 2 <- Map 1 (SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: a (type: string), b (type: string) +outputColumnNames: a, b +Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) +sort order: +++ +Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) +Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE +TopN Hash Memory Usage: 0.1 +value expressions: _col3 (type: bigint) +Execution mode: vectorized, llap +LLAP IO: all inputs +Reducer 2 +Execution mode: vectorized, llap +Reduce Operator Tree: + Group By Operator +aggregations: count(VALUE._col0) +keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) +mode: mergepartial +outputColumnNames: _col0, _col1, _col3 +Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE +pruneGroupingSetId: true +Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE + Limit +Number of rows: 10 +
[3/7] hive git commit: HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out new file mode 100644 index 000..0175c38 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -0,0 +1,554 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * FROM +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq1 +join +(SELECT a, b, count(*) from T1 where a < 3 group by a, b with cube) subq2 +on subq1.a = subq2.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: (UDFToDouble(a) < 3.0) (type: boolean) +Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) +sort order: +++ +Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) +Statistics: Num rows: 8 Data size: 2040 Basic stats: COMPLETE Column stats: NONE +value expressions: _col3 (type: bigint) +Execution mode: vectorized, llap +LLAP IO: all inputs +Map 4 +Map Operator Tree: +TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: (UDFToDouble(a) < 3.0) (type: boolean) +Statistics: Num rows: 2 Data size: 510 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash +
[6/7] hive git commit: HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/queries/clientpositive/groupby_grouping_id1.q -- diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q index d43ea37..9948ce9 100644 --- a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q +++ b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q @@ -2,6 +2,8 @@ CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +-- SORT_QUERY_RESULTS + SELECT key, val, GROUPING__ID from T1 group by key, val with cube; SELECT key, val, GROUPING__ID from T1 group by cube(key, val); http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/queries/clientpositive/groupby_grouping_id2.q -- diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q index 77a1638..cc7f9e4 100644 --- a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q +++ b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q @@ -1,3 +1,7 @@ +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1; http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/queries/clientpositive/groupby_grouping_id3.q -- diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q index c6746a8..955dbe0 100644 --- a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q +++ b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q @@ -6,6 +6,12 @@ set hive.cbo.enable = false; -- SORT_QUERY_RESULTS +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; SELECT key, value, GROUPING__ID, count(*) FROM T1 GROUP BY key, value @@ -14,6 +20,12 @@ HAVING GROUPING__ID = 1; set hive.cbo.enable = true; +EXPLAIN +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; SELECT key, value, GROUPING__ID, count(*) FROM T1 GROUP BY key, value http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q -- diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q index e239a87..c22c97f 100644 --- a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q +++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q @@ -1,19 +1,39 @@ +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; + +-- SORT_QUERY_RESULTS + CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1; SELECT * FROM T1; +EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube; SELECT a, b, count(*) from T1 group by a, b with cube; +EXPLAIN +SELECT a, b, count(*) from T1 group by cube(a, b); SELECT a, b, count(*) from T1 group by cube(a, b); +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b), b, ()); +EXPLAIN +SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); SELECT a, b, count(*) FROM T1 GROUP BY a, b GROUPING SETS (a, (a, b)); +EXPLAIN +SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); SELECT a FROM T1 GROUP BY a, b, c GROUPING SETS (a, b, c); +EXPLAIN +SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); SELECT a FROM T1 GROUP BY a GROUPING SETS ((a), (a)); +EXPLAIN +SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); SELECT a + b, count(*) FROM T1 GROUP BY a + b GROUPING SETS (a+b); http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q -- diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q index b470964..90e6325 100644 --- a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q +++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q @@ -1,6 +1,10 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; set
[7/7] hive git commit: HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez)
HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/47ef02e4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/47ef02e4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/47ef02e4 Branch: refs/heads/master Commit: 47ef02e4877690cd6e5a4ef5f53cb55c41b8803e Parents: 62ea542 Author: Matt McClineAuthored: Sun Mar 19 01:54:08 2017 -0500 Committer: Matt McCline Committed: Sun Mar 19 01:54:08 2017 -0500 -- .../test/resources/testconfiguration.properties | 14 + .../ql/exec/vector/VectorColumnSetInfo.java | 158 ++-- .../ql/exec/vector/VectorGroupByOperator.java | 144 +++- .../ql/exec/vector/VectorGroupKeyHelper.java| 15 +- .../ql/exec/vector/VectorHashKeyWrapper.java| 200 +++-- .../exec/vector/VectorHashKeyWrapperBatch.java | 445 +++--- .../ql/exec/vector/VectorMapJoinOperator.java | 3 + .../exec/vector/VectorSMBMapJoinOperator.java | 3 + .../hive/ql/optimizer/physical/Vectorizer.java | 19 +- .../clientpositive/groupby_grouping_id1.q | 2 + .../clientpositive/groupby_grouping_id2.q | 4 + .../clientpositive/groupby_grouping_id3.q | 12 + .../clientpositive/groupby_grouping_sets1.q | 20 + .../clientpositive/groupby_grouping_sets2.q | 4 + .../clientpositive/groupby_grouping_sets3.q | 6 + .../clientpositive/groupby_grouping_sets4.q | 1 + .../clientpositive/groupby_grouping_sets5.q | 2 + .../clientpositive/groupby_grouping_sets6.q | 2 + .../groupby_grouping_sets_grouping.q| 3 + .../groupby_grouping_sets_limit.q | 3 + .../clientpositive/vector_groupby_cube1.q | 55 ++ .../vector_groupby_grouping_id1.q | 23 + .../vector_groupby_grouping_id2.q | 65 ++ .../vector_groupby_grouping_id3.q | 42 + .../vector_groupby_grouping_sets1.q | 43 + .../vector_groupby_grouping_sets2.q | 36 + .../vector_groupby_grouping_sets3.q | 40 + .../vector_groupby_grouping_sets4.q | 57 ++ .../vector_groupby_grouping_sets5.q | 39 + .../vector_groupby_grouping_sets6.q | 38 + .../vector_groupby_grouping_sets_grouping.q | 99 +++ .../vector_groupby_grouping_sets_limit.q| 43 + .../vector_groupby_grouping_window.q| 21 + .../clientpositive/vector_groupby_rollup1.q | 54 ++ .../clientpositive/groupby_grouping_id1.q.out | 120 +-- .../clientpositive/groupby_grouping_id3.q.out | 139 +++ .../clientpositive/groupby_grouping_sets1.q.out | 496 ++- .../clientpositive/groupby_grouping_sets2.q.out | 62 +- .../clientpositive/groupby_grouping_sets3.q.out | 41 +- .../clientpositive/groupby_grouping_sets5.q.out | 36 +- .../clientpositive/groupby_grouping_sets6.q.out | 4 +- .../groupby_grouping_sets_grouping.q.out| 104 +-- .../groupby_grouping_sets_limit.q.out | 34 +- .../llap/groupby_grouping_id2.q.out | 9 + .../llap/vector_empty_where.q.out | 16 +- .../llap/vector_groupby_cube1.q.out | 773 + .../llap/vector_groupby_grouping_id1.q.out | 179 .../llap/vector_groupby_grouping_id2.q.out | 359 .../llap/vector_groupby_grouping_id3.q.out | 232 + .../llap/vector_groupby_grouping_sets1.q.out| 668 +++ .../llap/vector_groupby_grouping_sets2.q.out| 469 +++ .../llap/vector_groupby_grouping_sets3.q.out| 314 +++ .../llap/vector_groupby_grouping_sets4.q.out| 554 .../llap/vector_groupby_grouping_sets5.q.out| 371 .../llap/vector_groupby_grouping_sets6.q.out| 192 + .../vector_groupby_grouping_sets_grouping.q.out | 838 +++ .../vector_groupby_grouping_sets_limit.q.out| 568 + .../llap/vector_groupby_grouping_window.q.out | 157 .../llap/vector_groupby_rollup1.q.out | 610 ++ .../llap/vector_grouping_sets.q.out | 69 +- .../llap/vector_non_string_partition.q.out | 144 +++- .../llap/vector_tablesample_rows.q.out | 2 +- .../clientpositive/llap/vectorization_15.q.out | 6 +- .../llap/vectorization_div0.q.out | 414 + .../llap/vectorization_limit.q.out | 470 +-- .../llap/vectorization_offset_limit.q.out | 2 +- .../llap/vectorized_mapjoin2.q.out | 4 +- .../spark/groupby_grouping_id2.q.out| 9 + .../clientpositive/spark/vectorization_15.q.out | 6 +- .../clientpositive/vector_grouping_sets.q.out | 51 +- .../clientpositive/vectorization_15.q.out | 6 +- 71 files
[4/7] hive git commit: HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out new file mode 100644 index 000..5d34347 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -0,0 +1,668 @@ +PREHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1_text +POSTHOOK: query: CREATE TABLE T1_text(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1_text +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@t1_text +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1_text +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@t1_text +PREHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@t1_text +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1 STORED AS ORC AS SELECT * FROM T1_text +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@t1_text +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +POSTHOOK: Lineage: t1.a SIMPLE [(t1_text)t1_text.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: t1.b SIMPLE [(t1_text)t1_text.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: t1.c SIMPLE [(t1_text)t1_text.FieldSchema(name:c, type:string, comment:null), ] +t1_text.a t1_text.b t1_text.c +PREHOOK: query: SELECT * FROM T1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 + A masked pattern was here +POSTHOOK: query: SELECT * FROM T1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 + A masked pattern was here +t1.a t1.bt1.c +1 1 3 +2 2 4 +2 3 5 +3 2 8 +5 2 2 +8 1 1 +PREHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a, b, count(*) from T1 group by a, b with cube +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Reducer 2 <- Map 1 (SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: t1 + Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: a (type: string), b (type: string) +outputColumnNames: a, b +Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: NONE +Group By Operator + aggregations: count() + keys: a (type: string), b (type: string), 0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) +sort order: +++ +Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) +Statistics: Num rows: 24 Data size: 6120 Basic stats: COMPLETE Column stats: NONE +value expressions: _col3 (type: bigint) +Execution mode: vectorized, llap +LLAP IO: all inputs +Reducer 2 +Execution mode: vectorized, llap +Reduce Operator Tree: + Group By Operator +aggregations: count(VALUE._col0) +keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) +mode: mergepartial +outputColumnNames: _col0, _col1, _col3 +Statistics: Num rows: 12 Data size: 3060 Basic stats: COMPLETE Column stats: NONE +pruneGroupingSetId: true +Select Operator + expressions: _col0 (type:
[5/7] hive git commit: HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out b/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out index 9f93f86..ed9d551 100644 --- a/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out @@ -62,7 +62,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -111,7 +111,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) @@ -218,7 +218,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2888 Data size: 8628 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -267,7 +267,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) @@ -382,7 +382,7 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true -
[1/7] hive git commit: HIVE-14016 : Vectorization: Add support for Grouping Sets (Matt McCline, reviewed by Gopal Vijayaraghavan, Jesus Camacho Rodriguez)
Repository: hive Updated Branches: refs/heads/master 62ea542e2 -> 47ef02e48 http://git-wip-us.apache.org/repos/asf/hive/blob/47ef02e4/ql/src/test/results/clientpositive/llap/vectorization_div0.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_div0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_div0.q.out index 2b61979..37d05c8 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_div0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_div0.q.out @@ -1,27 +1,71 @@ -PREHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +PREHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY -POSTHOOK: query: -- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants -explain +POSTHOOK: query: explain vectorization expression select cdouble / 0.0 from alltypesorc limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator +expressions: (cdouble / 0.0) (type: double) +outputColumnNames: _col0 +Select Vectorization: +className: VectorSelectOperator +native: true +projectedOutputColumns: [12] +selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double +Statistics: Num rows: 12288 Data size: 98304 Basic stats: COMPLETE Column stats: COMPLETE +Limit + Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator +compressed: false +File Sink Vectorization: +className: VectorFileSinkOperator +native: false +Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +Execution mode: vectorized, llap +LLAP IO: all inputs +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true +groupByVectorOutput: true +inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +allNative: false +usesVectorUDFAdaptor: false +vectorized: true + Stage: Stage-0 Fetch Operator limit: 100 Processor Tree: -TableScan - alias: alltypesorc - Select Operator -expressions: (cdouble / 0.0) (type: double) -outputColumnNames: _col0 -Limit - Number of rows: 100 - ListSink +ListSink PREHOOK: query: select cdouble / 0.0 from alltypesorc limit 100 PREHOOK: type: QUERY @@ -131,20 +175,18 @@ NULL NULL NULL NULL -PREHOOK: query: -- There are no zeros in the table, but there is 98, so use it as zero - --- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +PREHOOK: query: explain vectorization expression select (cbigint - 98L) as s1, cdouble / (cbigint - 98L) as s2, 1.2 / (cbigint - 98L) from alltypesorc where cbigint > 0 and cbigint < 1 order by s1, s2 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: -- There are no zeros in the table, but there is 98, so use it as zero - --- TODO: add more stuff here after HIVE-5918 is fixed, such as cbigint and constants as numerators -explain +POSTHOOK: query: explain vectorization
hive git commit: Revert "HIVE-15126: Branch-1.2: Fix TestCliDriver.join_merge_multi_expressions.q (Vaibhav Gumashta reviewed by Daniel Dai)"
Repository: hive Updated Branches: refs/heads/branch-1.2 2efcf9a31 -> 8bc7daeb0 Revert "HIVE-15126: Branch-1.2: Fix TestCliDriver.join_merge_multi_expressions.q (Vaibhav Gumashta reviewed by Daniel Dai)" This reverts commit 2efcf9a31fc3214a04745ae2352dfa17ae2dc0c5. Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8bc7daeb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8bc7daeb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8bc7daeb Branch: refs/heads/branch-1.2 Commit: 8bc7daeb04ee3b5d1453bccd2ed6c43706eed854 Parents: 2efcf9a Author: Vaibhav GumashtaAuthored: Sat Mar 18 23:43:35 2017 -0700 Committer: Vaibhav Gumashta Committed: Sat Mar 18 23:43:35 2017 -0700 -- .../join_merge_multi_expressions.q.out | 46 1 file changed, 17 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8bc7daeb/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out -- diff --git a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out index b73643e..a8bd4df 100644 --- a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out @@ -21,54 +21,42 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: key (type: string), hr (type: string) -outputColumnNames: _col0, _col1 + Reduce Output Operator +key expressions: key (type: string), hr (type: string) +sort order: ++ +Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan -alias: a +alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: key (type: string), hr (type: string) -outputColumnNames: _col0, _col1 + Reduce Output Operator +key expressions: key (type: string), hr (type: string) +sort order: ++ +Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan -alias: a +alias: c Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: key (type: string), hr (type: string) -outputColumnNames: _col0, _col1 + Reduce Output Operator +key expressions: key (type: string), hr (type: string) +sort order: ++ +Map-reduce partition columns: key (type: string), hr (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size:
hive git commit: HIVE-15126: Branch-1.2: Fix TestCliDriver.join_merge_multi_expressions.q (Vaibhav Gumashta reviewed by Daniel Dai)
Repository: hive Updated Branches: refs/heads/branch-1.2 d3b88022a -> 2efcf9a31 HIVE-15126: Branch-1.2: Fix TestCliDriver.join_merge_multi_expressions.q (Vaibhav Gumashta reviewed by Daniel Dai) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2efcf9a3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2efcf9a3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2efcf9a3 Branch: refs/heads/branch-1.2 Commit: 2efcf9a31fc3214a04745ae2352dfa17ae2dc0c5 Parents: d3b8802 Author: Vaibhav GumashtaAuthored: Sat Mar 18 23:41:27 2017 -0700 Committer: Vaibhav Gumashta Committed: Sat Mar 18 23:41:27 2017 -0700 -- .../join_merge_multi_expressions.q.out | 46 1 file changed, 29 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/2efcf9a3/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out -- diff --git a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out index a8bd4df..b73643e 100644 --- a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out @@ -21,42 +21,54 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string), hr (type: string) -sort order: ++ -Map-reduce partition columns: key (type: string), hr (type: string) + Select Operator +expressions: key (type: string), hr (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan -alias: b +alias: a Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string), hr (type: string) -sort order: ++ -Map-reduce partition columns: key (type: string), hr (type: string) + Select Operator +expressions: key (type: string), hr (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TableScan -alias: c +alias: a Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator -key expressions: key (type: string), hr (type: string) -sort order: ++ -Map-reduce partition columns: key (type: string), hr (type: string) + Select Operator +expressions: key (type: string), hr (type: string) +outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: