This is an automated email from the ASF dual-hosted git repository.
rameshkumar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 91e0f1a0401 HIVE-28408: Support ARRAY field access in CBO (Ramesh
Kumar, reviewed by Shohei Okumiya, Stamatis Zampetakis)
91e0f1a0401 is described below
commit 91e0f1a04012f9fc049d3a9557112fdc8830d65d
Author: Ramesh Kumar <[email protected]>
AuthorDate: Tue Jan 14 08:50:52 2025 -0800
HIVE-28408: Support ARRAY field access in CBO (Ramesh Kumar, reviewed by
Shohei Okumiya, Stamatis Zampetakis)
---
.../hive/ql/exec/vector/VectorizationContext.java | 3 +
.../calcite/CalciteSemanticException.java | 2 +-
.../calcite/reloperators/HiveComponentAccess.java | 34 ++++++
.../optimizer/calcite/translator/ASTConverter.java | 8 +-
.../calcite/translator/RexNodeConverter.java | 11 +-
.../hive/ql/parse/type/RexNodeExprFactory.java | 16 ++-
.../queries/clientpositive/nested_column_pruning.q | 1 +
.../vector_orc_nested_column_pruning.q | 16 +++
.../clientpositive/llap/input_testxpath4.q.out | 6 +-
.../llap/nested_column_pruning.q.out | 18 +--
.../llap/orc_nested_column_pruning.q.out | 18 +--
.../llap/vector_orc_nested_column_pruning.q.out | 124 +++++++++++++++++++--
12 files changed, 213 insertions(+), 44 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index a3a24aa9029..289e3f5c480 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1103,6 +1103,9 @@ import com.google.common.annotations.VisibleForTesting;
private int getStructFieldIndex(ExprNodeFieldDesc exprNodeFieldDesc) throws
HiveException {
ExprNodeDesc structNodeDesc = exprNodeFieldDesc.getDesc();
String fieldName = exprNodeFieldDesc.getFieldName();
+ if (exprNodeFieldDesc.getIsList()) {
+ throw new HiveException("Could not vectorize expression with a LIST type
without an index");
+ }
StructTypeInfo structTypeInfo = (StructTypeInfo)
structNodeDesc.getTypeInfo();
int index = 0;
boolean isFieldExist = false;
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
index 690899f85e0..7818474e471 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
@@ -33,7 +33,7 @@ public class CalciteSemanticException extends
SemanticException {
Distinct_without_an_aggregation, Duplicates_in_RR,
Having_clause_without_any_groupby, Invalid_column_reference,
Invalid_decimal,
Less_than_equal_greater_than, Others, Same_name_in_multiple_expressions,
- Schema_less_table, Select_alias_in_having_clause, Select_transform,
Subquery,
+ Select_alias_in_having_clause, Select_transform, Subquery,
Table_sample_clauses, UDTF, Unique_join,
HighPrecisionTimestamp // CALCITE-1690
};
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveComponentAccess.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveComponentAccess.java
new file mode 100644
index 00000000000..8573e3d5b0f
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveComponentAccess.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.sql.SqlFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
+
+/**
+ * Special operator that is used as syntactic sugar to change the type of
collection
+ * expressions in order to perform field access over them.
+ */
+public final class HiveComponentAccess {
+ public static final SqlOperator COMPONENT_ACCESS =
+ new SqlFunction("COMPONENT_ACCESS", SqlKind.OTHER_FUNCTION, null,
+ null, null, SqlFunctionCategory.SYSTEM);
+}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index 8c6ff8bab0e..f2823dfcc67 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -21,7 +21,6 @@ import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
-import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@@ -73,11 +72,10 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveComponentAccess;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
import
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
import
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter;
@@ -92,6 +90,7 @@ import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.ParseException;
+import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.util.DirectionUtils;
import org.apache.hadoop.hive.ql.util.NullOrdering;
@@ -1078,6 +1077,9 @@ public class ASTConverter {
}
// fall-through
default:
+ if (op.equals(HiveComponentAccess.COMPONENT_ACCESS)) {
+ return call.operands.get(0).accept(this);
+ }
for (RexNode operand : call.operands) {
astNodeLst.add(operand.accept(this));
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index e6c41c3b620..31e0d4dee57 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -39,6 +39,7 @@ import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlCastFunction;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParserPos;
+import org.apache.calcite.sql.type.ArraySqlType;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.calcite.util.ConversionUtil;
@@ -57,6 +58,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveComponentAccess;
import
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
import
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator;
@@ -103,6 +105,7 @@ import java.math.BigDecimal;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Calendar;
+import java.util.Collections;
import java.util.List;
/**
@@ -141,11 +144,11 @@ public class RexNodeConverter {
if (rexNode.getType().isStruct()) {
// regular case of accessing nested field in a column
return rexBuilder.makeFieldAccess(rexNode, fieldDesc.getFieldName(),
true);
+ } else if (rexNode.getType().getComponentType() != null) {
+ return rexBuilder.makeCall(rexNode.getType().getComponentType(),
HiveComponentAccess.COMPONENT_ACCESS,
+ Collections.singletonList(rexNode));
} else {
- // This may happen for schema-less tables, where columns are dynamically
- // supplied by serdes.
- throw new CalciteSemanticException("Unexpected rexnode : "
- + rexNode.getClass().getCanonicalName(),
UnsupportedFeature.Schema_less_table);
+ throw new CalciteSemanticException("Unexpected rexnode : " +
rexNode.getClass().getCanonicalName());
}
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
index ee8779188d3..8cc75d7e1fc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
@@ -23,6 +23,7 @@ import java.nio.charset.Charset;
import java.time.Instant;
import java.time.ZoneId;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@@ -39,12 +40,16 @@ import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlCollation;
+import org.apache.calcite.sql.SqlFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlIntervalQualifier;
import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlQuantifyOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.sql.type.ArraySqlType;
import org.apache.calcite.util.ConversionUtil;
import org.apache.calcite.util.DateString;
import org.apache.calcite.util.NlsString;
@@ -67,6 +72,7 @@ import
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.Unsu
import
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRexExprList;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveComponentAccess;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
@@ -622,11 +628,13 @@ public class RexNodeExprFactory extends
ExprFactory<RexNode> {
if (expr.getType().isStruct()) {
// regular case of accessing nested field in a column
return rexBuilder.makeFieldAccess(expr, fieldName, true);
+ } else if (expr.getType().getComponentType() != null) {
+ RexNode wrap = rexBuilder.makeCall(expr.getType().getComponentType(),
HiveComponentAccess.COMPONENT_ACCESS,
+ Collections.singletonList(expr));
+ return createNestedColumnRefExpr(typeInfo, wrap, fieldName,
+ expr.getType().getComponentType() instanceof ArraySqlType);
} else {
- // This may happen for schema-less tables, where columns are dynamically
- // supplied by serdes.
- throw new CalciteSemanticException("Unexpected rexnode : "
- + expr.getClass().getCanonicalName(),
UnsupportedFeature.Schema_less_table);
+ throw new CalciteSemanticException("Unexpected rexnode : " +
expr.getClass().getCanonicalName());
}
}
diff --git a/ql/src/test/queries/clientpositive/nested_column_pruning.q
b/ql/src/test/queries/clientpositive/nested_column_pruning.q
index e3c3e2a8291..00aef84cb0d 100644
--- a/ql/src/test/queries/clientpositive/nested_column_pruning.q
+++ b/ql/src/test/queries/clientpositive/nested_column_pruning.q
@@ -4,6 +4,7 @@ set hive.test.vectorized.execution.enabled.override=none;
set hive.fetch.task.conversion = none;
set hive.strict.checks.cartesian.product=false;
+set hive.cbo.fallback.strategy=NEVER;
-- First, create source tables
DROP TABLE IF EXISTS dummy_n5;
diff --git
a/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q
b/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q
index 9f76f2921f9..74f74909e5b 100644
--- a/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q
+++ b/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q
@@ -209,6 +209,10 @@ SELECT count(s1.f6), s5.f16.f18.f19
FROM nested_tbl_1
GROUP BY s5.f16.f18.f19;
+EXPLAIN CBO SELECT count(s1.f6), s5.f16.f18.f19
+FROM nested_tbl_1
+GROUP BY s5.f16.f18.f19;
+
SELECT count(s1.f6), s5.f16.f18.f19
FROM nested_tbl_1
GROUP BY s5.f16.f18.f19;
@@ -230,3 +234,15 @@ GROUP BY s6['key1'].f20.f21.f22;
SELECT count(s1.f6), s6['key1'].f20.f21.f22
FROM nested_tbl_1
GROUP BY s6['key1'].f20.f21.f22;
+
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1;
+
+EXPLAIN CBO
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1;
+
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1;
+
diff --git a/ql/src/test/results/clientpositive/llap/input_testxpath4.q.out
b/ql/src/test/results/clientpositive/llap/input_testxpath4.q.out
index dec59379b49..1b2eda86326 100644
--- a/ql/src/test/results/clientpositive/llap/input_testxpath4.q.out
+++ b/ql/src/test/results/clientpositive/llap/input_testxpath4.q.out
@@ -27,7 +27,7 @@ STAGE PLANS:
TableScan
alias: src_thrift
Filter Operator
- predicate: (mstringstring['key_9'] is not null and
lintstring.myint is not null and lintstring is not null) (type: boolean)
+ predicate: (lintstring.myint is not null and
mstringstring['key_9'] is not null and lintstring is not null) (type: boolean)
Select Operator
expressions: mstringstring['key_9'] (type: string),
lintstring.myint (type: array<int>)
outputColumnNames: _col0, _col1
@@ -87,9 +87,9 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: src_thrift
- filterExpr: (mstringstring['key_9'] is not null and lintstring.myint
is not null and lintstring is not null) (type: boolean)
+ filterExpr: (lintstring.myint is not null and mstringstring['key_9']
is not null and lintstring is not null) (type: boolean)
Filter Operator
- predicate: (mstringstring['key_9'] is not null and
lintstring.myint is not null and lintstring is not null) (type: boolean)
+ predicate: (lintstring.myint is not null and
mstringstring['key_9'] is not null and lintstring is not null) (type: boolean)
Select Operator
expressions: mstringstring['key_9'] (type: string),
lintstring.myint (type: array<int>)
outputColumnNames: _col0, _col1
diff --git
a/ql/src/test/results/clientpositive/llap/nested_column_pruning.q.out
b/ql/src/test/results/clientpositive/llap/nested_column_pruning.q.out
index e0990f51a16..499ba134dcc 100644
--- a/ql/src/test/results/clientpositive/llap/nested_column_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/nested_column_pruning.q.out
@@ -1966,15 +1966,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: nested_tbl_1_n1
- Pruned Column Paths: s1.f6, s5.f16
+ Pruned Column Paths: s5.f16, s1.f6
Statistics: Num rows: 1 Data size: 3196 Basic stats:
COMPLETE Column stats: NONE
Select Operator
- expressions: s1 (type:
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s5 (type:
struct<f16:array<struct<f17:string,f18:struct<f19:int>>>>)
- outputColumnNames: s1, s5
+ expressions: s5.f16.f18.f19 (type: array<int>), s1.f6
(type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 3196 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(s1.f6)
- keys: s5.f16.f18.f19 (type: array<int>)
+ aggregations: count(_col1)
+ keys: _col0 (type: array<int>)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
@@ -2153,12 +2153,12 @@ STAGE PLANS:
Pruned Column Paths: s1.f6
Statistics: Num rows: 1 Data size: 2012 Basic stats:
COMPLETE Column stats: NONE
Select Operator
- expressions: s1 (type:
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s6 (type:
map<string,struct<f20:array<struct<f21:struct<f22:int>>>>>)
- outputColumnNames: s1, s6
+ expressions: s6['key1'].f20.f21.f22 (type: array<int>),
s1.f6 (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 2012 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(s1.f6)
- keys: s6['key1'].f20.f21.f22 (type: array<int>)
+ aggregations: count(_col1)
+ keys: _col0 (type: array<int>)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
diff --git
a/ql/src/test/results/clientpositive/llap/orc_nested_column_pruning.q.out
b/ql/src/test/results/clientpositive/llap/orc_nested_column_pruning.q.out
index 9be0258bf26..baa308f9c7e 100644
--- a/ql/src/test/results/clientpositive/llap/orc_nested_column_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_nested_column_pruning.q.out
@@ -1966,15 +1966,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: nested_tbl_1_n0
- Pruned Column Paths: s1.f6, s5.f16
+ Pruned Column Paths: s5.f16, s1.f6
Statistics: Num rows: 1 Data size: 3196 Basic stats:
COMPLETE Column stats: NONE
Select Operator
- expressions: s1 (type:
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s5 (type:
struct<f16:array<struct<f17:string,f18:struct<f19:int>>>>)
- outputColumnNames: s1, s5
+ expressions: s5.f16.f18.f19 (type: array<int>), s1.f6
(type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 3196 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(s1.f6)
- keys: s5.f16.f18.f19 (type: array<int>)
+ aggregations: count(_col1)
+ keys: _col0 (type: array<int>)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
@@ -2153,12 +2153,12 @@ STAGE PLANS:
Pruned Column Paths: s1.f6
Statistics: Num rows: 1 Data size: 2012 Basic stats:
COMPLETE Column stats: NONE
Select Operator
- expressions: s1 (type:
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s6 (type:
map<string,struct<f20:array<struct<f21:struct<f22:int>>>>>)
- outputColumnNames: s1, s6
+ expressions: s6['key1'].f20.f21.f22 (type: array<int>),
s1.f6 (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 2012 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(s1.f6)
- keys: s6['key1'].f20.f21.f22 (type: array<int>)
+ aggregations: count(_col1)
+ keys: _col0 (type: array<int>)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
diff --git
a/ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out
b/ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out
index b1618b08544..4fc2aefbc3c 100644
---
a/ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out
+++
b/ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out
@@ -2730,15 +2730,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: nested_tbl_1
- Pruned Column Paths: s1.f6, s5.f16
+ Pruned Column Paths: s5.f16, s1.f6
Statistics: Num rows: 1 Data size: 3196 Basic stats:
COMPLETE Column stats: NONE
Select Operator
- expressions: s1 (type:
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s5 (type:
struct<f16:array<struct<f17:string,f18:struct<f19:int>>>>)
- outputColumnNames: s1, s5
+ expressions: s5.f16.f18.f19 (type: array<int>), s1.f6
(type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 3196 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(s1.f6)
- keys: s5.f16.f18.f19 (type: array<int>)
+ aggregations: count(_col1)
+ keys: _col0 (type: array<int>)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
@@ -2756,7 +2756,7 @@ STAGE PLANS:
enabled: true
enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Key expression for GROUPBY operator:
Vectorizing complex type LIST not supported
+ notVectorizedReason: SELECT operator: Could not vectorize
expression with a LIST type without an index
vectorized: false
Reducer 2
Execution mode: llap
@@ -2790,6 +2790,24 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: EXPLAIN CBO SELECT count(s1.f6), s5.f16.f18.f19
+FROM nested_tbl_1
+GROUP BY s5.f16.f18.f19
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT count(s1.f6), s5.f16.f18.f19
+FROM nested_tbl_1
+GROUP BY s5.f16.f18.f19
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(_o__c0=[$1], f19=[$0])
+ HiveAggregate(group=[{0}], agg#0=[count($1)])
+ HiveProject($f0=[COMPONENT_ACCESS($5.f16).f18.f19], $f1=[$1.f6])
+ HiveTableScan(table=[[default, nested_tbl_1]],
table:alias=[nested_tbl_1])
+
PREHOOK: query: SELECT count(s1.f6), s5.f16.f18.f19
FROM nested_tbl_1
GROUP BY s5.f16.f18.f19
@@ -2985,12 +3003,12 @@ STAGE PLANS:
Pruned Column Paths: s1.f6
Statistics: Num rows: 1 Data size: 2012 Basic stats:
COMPLETE Column stats: NONE
Select Operator
- expressions: s1 (type:
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s6 (type:
map<string,struct<f20:array<struct<f21:struct<f22:int>>>>>)
- outputColumnNames: s1, s6
+ expressions: s6['key1'].f20.f21.f22 (type: array<int>),
s1.f6 (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 2012 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(s1.f6)
- keys: s6['key1'].f20.f21.f22 (type: array<int>)
+ aggregations: count(_col1)
+ keys: _col0 (type: array<int>)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
@@ -3008,7 +3026,7 @@ STAGE PLANS:
enabled: true
enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Key expression for GROUPBY operator:
Vectorizing complex type LIST not supported
+ notVectorizedReason: SELECT operator: Could not vectorize
expression with a LIST type without an index
vectorized: false
Reducer 2
Execution mode: llap
@@ -3055,3 +3073,87 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@nested_tbl_1
#### A masked pattern was here ####
1 [1]
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: nested_tbl_1
+ Pruned Column Paths: s5.f16
+ Statistics: Num rows: 1 Data size: 2880 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s5.f16.f18.f19 (type: array<int>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 2880 Basic stats:
COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 2880 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: SELECT operator: Could not vectorize
expression with a LIST type without an index
+ vectorized: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN CBO
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(f19=[COMPONENT_ACCESS($5.f16).f18.f19])
+ HiveTableScan(table=[[default, nested_tbl_1]], table:alias=[nested_tbl_1])
+
+PREHOOK: query: SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+[14,28]