(hive) branch master updated: HIVE-28408: Support ARRAY field access in CBO (Ramesh Kumar, reviewed by Shohei Okumiya, Stamatis Zampetakis)

rameshkumar Tue, 14 Jan 2025 08:51:10 -0800

This is an automated email from the ASF dual-hosted git repository.

rameshkumar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 91e0f1a0401 HIVE-28408: Support ARRAY field access in CBO (Ramesh 
Kumar, reviewed by Shohei Okumiya, Stamatis Zampetakis)
91e0f1a0401 is described below

commit 91e0f1a04012f9fc049d3a9557112fdc8830d65d
Author: Ramesh Kumar <[email protected]>
AuthorDate: Tue Jan 14 08:50:52 2025 -0800

    HIVE-28408: Support ARRAY field access in CBO (Ramesh Kumar, reviewed by 
Shohei Okumiya, Stamatis Zampetakis)
---
 .../hive/ql/exec/vector/VectorizationContext.java  |   3 +
 .../calcite/CalciteSemanticException.java          |   2 +-
 .../calcite/reloperators/HiveComponentAccess.java  |  34 ++++++
 .../optimizer/calcite/translator/ASTConverter.java |   8 +-
 .../calcite/translator/RexNodeConverter.java       |  11 +-
 .../hive/ql/parse/type/RexNodeExprFactory.java     |  16 ++-
 .../queries/clientpositive/nested_column_pruning.q |   1 +
 .../vector_orc_nested_column_pruning.q             |  16 +++
 .../clientpositive/llap/input_testxpath4.q.out     |   6 +-
 .../llap/nested_column_pruning.q.out               |  18 +--
 .../llap/orc_nested_column_pruning.q.out           |  18 +--
 .../llap/vector_orc_nested_column_pruning.q.out    | 124 +++++++++++++++++++--
 12 files changed, 213 insertions(+), 44 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index a3a24aa9029..289e3f5c480 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1103,6 +1103,9 @@ import com.google.common.annotations.VisibleForTesting;
   private int getStructFieldIndex(ExprNodeFieldDesc exprNodeFieldDesc) throws 
HiveException {
     ExprNodeDesc structNodeDesc = exprNodeFieldDesc.getDesc();
     String fieldName = exprNodeFieldDesc.getFieldName();
+    if (exprNodeFieldDesc.getIsList()) {
+      throw new HiveException("Could not vectorize expression with a LIST type 
without an index");
+    }
     StructTypeInfo structTypeInfo = (StructTypeInfo) 
structNodeDesc.getTypeInfo();
     int index = 0;
     boolean isFieldExist = false;
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
index 690899f85e0..7818474e471 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
@@ -33,7 +33,7 @@ public class CalciteSemanticException extends 
SemanticException {
     Distinct_without_an_aggregation, Duplicates_in_RR,
     Having_clause_without_any_groupby, Invalid_column_reference, 
Invalid_decimal,
     Less_than_equal_greater_than, Others, Same_name_in_multiple_expressions,
-    Schema_less_table, Select_alias_in_having_clause, Select_transform, 
Subquery,
+    Select_alias_in_having_clause, Select_transform, Subquery,
     Table_sample_clauses, UDTF, Unique_join,
     HighPrecisionTimestamp // CALCITE-1690
   };
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveComponentAccess.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveComponentAccess.java
new file mode 100644
index 00000000000..8573e3d5b0f
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveComponentAccess.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.sql.SqlFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
+
+/**
+ * Special operator that is used as syntactic sugar to change the type of 
collection
+ * expressions in order to perform field access over them.
+ */
+public final class HiveComponentAccess {
+    public static final SqlOperator COMPONENT_ACCESS =
+        new SqlFunction("COMPONENT_ACCESS", SqlKind.OTHER_FUNCTION, null,
+                null, null, SqlFunctionCategory.SYSTEM);
+}
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index 8c6ff8bab0e..f2823dfcc67 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -21,7 +21,6 @@ import java.math.BigDecimal;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
@@ -73,11 +72,10 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.QueryProperties;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveComponentAccess;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveValues;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter;
@@ -92,6 +90,7 @@ import org.apache.hadoop.hive.ql.parse.HiveParser;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.ParseDriver;
 import org.apache.hadoop.hive.ql.parse.ParseException;
+import org.apache.hadoop.hive.ql.parse.type.RexNodeExprFactory;
 import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
 import org.apache.hadoop.hive.ql.util.DirectionUtils;
 import org.apache.hadoop.hive.ql.util.NullOrdering;
@@ -1078,6 +1077,9 @@ public class ASTConverter {
         }
         // fall-through
       default:
+        if (op.equals(HiveComponentAccess.COMPONENT_ACCESS)) {
+          return call.operands.get(0).accept(this);
+        }
         for (RexNode operand : call.operands) {
           astNodeLst.add(operand.accept(this));
         }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index e6c41c3b620..31e0d4dee57 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -39,6 +39,7 @@ import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.fun.SqlCastFunction;
 import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.calcite.sql.parser.SqlParserPos;
+import org.apache.calcite.sql.type.ArraySqlType;
 import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.sql.type.SqlTypeUtil;
 import org.apache.calcite.util.ConversionUtil;
@@ -57,6 +58,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveComponentAccess;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator;
@@ -103,6 +105,7 @@ import java.math.BigDecimal;
 import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Calendar;
+import java.util.Collections;
 import java.util.List;
 
 /**
@@ -141,11 +144,11 @@ public class RexNodeConverter {
     if (rexNode.getType().isStruct()) {
       // regular case of accessing nested field in a column
       return rexBuilder.makeFieldAccess(rexNode, fieldDesc.getFieldName(), 
true);
+    } else if (rexNode.getType().getComponentType() != null) {
+      return rexBuilder.makeCall(rexNode.getType().getComponentType(), 
HiveComponentAccess.COMPONENT_ACCESS,
+              Collections.singletonList(rexNode));
     } else {
-      // This may happen for schema-less tables, where columns are dynamically
-      // supplied by serdes.
-      throw new CalciteSemanticException("Unexpected rexnode : "
-          + rexNode.getClass().getCanonicalName(), 
UnsupportedFeature.Schema_less_table);
+      throw new CalciteSemanticException("Unexpected rexnode : " + 
rexNode.getClass().getCanonicalName());
     }
   }
 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
index ee8779188d3..8cc75d7e1fc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
@@ -23,6 +23,7 @@ import java.nio.charset.Charset;
 import java.time.Instant;
 import java.time.ZoneId;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -39,12 +40,16 @@ import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexSubQuery;
 import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.sql.SqlCollation;
+import org.apache.calcite.sql.SqlFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
 import org.apache.calcite.sql.SqlIntervalQualifier;
 import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.fun.SqlQuantifyOperator;
 import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.calcite.sql.parser.SqlParserPos;
 import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.sql.type.ArraySqlType;
 import org.apache.calcite.util.ConversionUtil;
 import org.apache.calcite.util.DateString;
 import org.apache.calcite.util.NlsString;
@@ -67,6 +72,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.Unsu
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRexExprList;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveComponentAccess;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.HiveParser;
@@ -622,11 +628,13 @@ public class RexNodeExprFactory extends 
ExprFactory<RexNode> {
     if (expr.getType().isStruct()) {
       // regular case of accessing nested field in a column
       return rexBuilder.makeFieldAccess(expr, fieldName, true);
+    } else if (expr.getType().getComponentType() != null) {
+      RexNode wrap = rexBuilder.makeCall(expr.getType().getComponentType(), 
HiveComponentAccess.COMPONENT_ACCESS,
+                  Collections.singletonList(expr));
+      return createNestedColumnRefExpr(typeInfo, wrap, fieldName,
+              expr.getType().getComponentType() instanceof ArraySqlType);
     } else {
-      // This may happen for schema-less tables, where columns are dynamically
-      // supplied by serdes.
-      throw new CalciteSemanticException("Unexpected rexnode : "
-          + expr.getClass().getCanonicalName(), 
UnsupportedFeature.Schema_less_table);
+      throw new CalciteSemanticException("Unexpected rexnode : " + 
expr.getClass().getCanonicalName());
     }
   }
 
diff --git a/ql/src/test/queries/clientpositive/nested_column_pruning.q 
b/ql/src/test/queries/clientpositive/nested_column_pruning.q
index e3c3e2a8291..00aef84cb0d 100644
--- a/ql/src/test/queries/clientpositive/nested_column_pruning.q
+++ b/ql/src/test/queries/clientpositive/nested_column_pruning.q
@@ -4,6 +4,7 @@ set hive.test.vectorized.execution.enabled.override=none;
 
 set hive.fetch.task.conversion = none;
 set hive.strict.checks.cartesian.product=false;
+set hive.cbo.fallback.strategy=NEVER;
 
 -- First, create source tables
 DROP TABLE IF EXISTS dummy_n5;
diff --git 
a/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q 
b/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q
index 9f76f2921f9..74f74909e5b 100644
--- a/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q
+++ b/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q
@@ -209,6 +209,10 @@ SELECT count(s1.f6), s5.f16.f18.f19
 FROM nested_tbl_1
 GROUP BY s5.f16.f18.f19;
 
+EXPLAIN CBO SELECT count(s1.f6), s5.f16.f18.f19
+FROM nested_tbl_1
+GROUP BY s5.f16.f18.f19;
+
 SELECT count(s1.f6), s5.f16.f18.f19
 FROM nested_tbl_1
 GROUP BY s5.f16.f18.f19;
@@ -230,3 +234,15 @@ GROUP BY s6['key1'].f20.f21.f22;
 SELECT count(s1.f6), s6['key1'].f20.f21.f22
 FROM nested_tbl_1
 GROUP BY s6['key1'].f20.f21.f22;
+
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1;
+
+EXPLAIN CBO
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1;
+
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1;
+
diff --git a/ql/src/test/results/clientpositive/llap/input_testxpath4.q.out 
b/ql/src/test/results/clientpositive/llap/input_testxpath4.q.out
index dec59379b49..1b2eda86326 100644
--- a/ql/src/test/results/clientpositive/llap/input_testxpath4.q.out
+++ b/ql/src/test/results/clientpositive/llap/input_testxpath4.q.out
@@ -27,7 +27,7 @@ STAGE PLANS:
         TableScan
           alias: src_thrift
           Filter Operator
-            predicate: (mstringstring['key_9'] is not null and 
lintstring.myint is not null and lintstring is not null) (type: boolean)
+            predicate: (lintstring.myint is not null and 
mstringstring['key_9'] is not null and lintstring is not null) (type: boolean)
             Select Operator
               expressions: mstringstring['key_9'] (type: string), 
lintstring.myint (type: array<int>)
               outputColumnNames: _col0, _col1
@@ -87,9 +87,9 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: src_thrift
-          filterExpr: (mstringstring['key_9'] is not null and lintstring.myint 
is not null and lintstring is not null) (type: boolean)
+          filterExpr: (lintstring.myint is not null and mstringstring['key_9'] 
is not null and lintstring is not null) (type: boolean)
           Filter Operator
-            predicate: (mstringstring['key_9'] is not null and 
lintstring.myint is not null and lintstring is not null) (type: boolean)
+            predicate: (lintstring.myint is not null and 
mstringstring['key_9'] is not null and lintstring is not null) (type: boolean)
             Select Operator
               expressions: mstringstring['key_9'] (type: string), 
lintstring.myint (type: array<int>)
               outputColumnNames: _col0, _col1
diff --git 
a/ql/src/test/results/clientpositive/llap/nested_column_pruning.q.out 
b/ql/src/test/results/clientpositive/llap/nested_column_pruning.q.out
index e0990f51a16..499ba134dcc 100644
--- a/ql/src/test/results/clientpositive/llap/nested_column_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/nested_column_pruning.q.out
@@ -1966,15 +1966,15 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: nested_tbl_1_n1
-                  Pruned Column Paths: s1.f6, s5.f16
+                  Pruned Column Paths: s5.f16, s1.f6
                   Statistics: Num rows: 1 Data size: 3196 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: s1 (type: 
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s5 (type: 
struct<f16:array<struct<f17:string,f18:struct<f19:int>>>>)
-                    outputColumnNames: s1, s5
+                    expressions: s5.f16.f18.f19 (type: array<int>), s1.f6 
(type: int)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1 Data size: 3196 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(s1.f6)
-                      keys: s5.f16.f18.f19 (type: array<int>)
+                      aggregations: count(_col1)
+                      keys: _col0 (type: array<int>)
                       minReductionHashAggr: 0.99
                       mode: hash
                       outputColumnNames: _col0, _col1
@@ -2153,12 +2153,12 @@ STAGE PLANS:
                   Pruned Column Paths: s1.f6
                   Statistics: Num rows: 1 Data size: 2012 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: s1 (type: 
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s6 (type: 
map<string,struct<f20:array<struct<f21:struct<f22:int>>>>>)
-                    outputColumnNames: s1, s6
+                    expressions: s6['key1'].f20.f21.f22 (type: array<int>), 
s1.f6 (type: int)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1 Data size: 2012 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(s1.f6)
-                      keys: s6['key1'].f20.f21.f22 (type: array<int>)
+                      aggregations: count(_col1)
+                      keys: _col0 (type: array<int>)
                       minReductionHashAggr: 0.99
                       mode: hash
                       outputColumnNames: _col0, _col1
diff --git 
a/ql/src/test/results/clientpositive/llap/orc_nested_column_pruning.q.out 
b/ql/src/test/results/clientpositive/llap/orc_nested_column_pruning.q.out
index 9be0258bf26..baa308f9c7e 100644
--- a/ql/src/test/results/clientpositive/llap/orc_nested_column_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_nested_column_pruning.q.out
@@ -1966,15 +1966,15 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: nested_tbl_1_n0
-                  Pruned Column Paths: s1.f6, s5.f16
+                  Pruned Column Paths: s5.f16, s1.f6
                   Statistics: Num rows: 1 Data size: 3196 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: s1 (type: 
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s5 (type: 
struct<f16:array<struct<f17:string,f18:struct<f19:int>>>>)
-                    outputColumnNames: s1, s5
+                    expressions: s5.f16.f18.f19 (type: array<int>), s1.f6 
(type: int)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1 Data size: 3196 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(s1.f6)
-                      keys: s5.f16.f18.f19 (type: array<int>)
+                      aggregations: count(_col1)
+                      keys: _col0 (type: array<int>)
                       minReductionHashAggr: 0.99
                       mode: hash
                       outputColumnNames: _col0, _col1
@@ -2153,12 +2153,12 @@ STAGE PLANS:
                   Pruned Column Paths: s1.f6
                   Statistics: Num rows: 1 Data size: 2012 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: s1 (type: 
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s6 (type: 
map<string,struct<f20:array<struct<f21:struct<f22:int>>>>>)
-                    outputColumnNames: s1, s6
+                    expressions: s6['key1'].f20.f21.f22 (type: array<int>), 
s1.f6 (type: int)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1 Data size: 2012 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(s1.f6)
-                      keys: s6['key1'].f20.f21.f22 (type: array<int>)
+                      aggregations: count(_col1)
+                      keys: _col0 (type: array<int>)
                       minReductionHashAggr: 0.99
                       mode: hash
                       outputColumnNames: _col0, _col1
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out
 
b/ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out
index b1618b08544..4fc2aefbc3c 100644
--- 
a/ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out
@@ -2730,15 +2730,15 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: nested_tbl_1
-                  Pruned Column Paths: s1.f6, s5.f16
+                  Pruned Column Paths: s5.f16, s1.f6
                   Statistics: Num rows: 1 Data size: 3196 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: s1 (type: 
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s5 (type: 
struct<f16:array<struct<f17:string,f18:struct<f19:int>>>>)
-                    outputColumnNames: s1, s5
+                    expressions: s5.f16.f18.f19 (type: array<int>), s1.f6 
(type: int)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1 Data size: 3196 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(s1.f6)
-                      keys: s5.f16.f18.f19 (type: array<int>)
+                      aggregations: count(_col1)
+                      keys: _col0 (type: array<int>)
                       minReductionHashAggr: 0.99
                       mode: hash
                       outputColumnNames: _col0, _col1
@@ -2756,7 +2756,7 @@ STAGE PLANS:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
                 inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                notVectorizedReason: Key expression for GROUPBY operator: 
Vectorizing complex type LIST not supported
+                notVectorizedReason: SELECT operator: Could not vectorize 
expression with a LIST type without an index
                 vectorized: false
         Reducer 2 
             Execution mode: llap
@@ -2790,6 +2790,24 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: EXPLAIN CBO SELECT count(s1.f6), s5.f16.f18.f19
+FROM nested_tbl_1
+GROUP BY s5.f16.f18.f19
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT count(s1.f6), s5.f16.f18.f19
+FROM nested_tbl_1
+GROUP BY s5.f16.f18.f19
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(_o__c0=[$1], f19=[$0])
+  HiveAggregate(group=[{0}], agg#0=[count($1)])
+    HiveProject($f0=[COMPONENT_ACCESS($5.f16).f18.f19], $f1=[$1.f6])
+      HiveTableScan(table=[[default, nested_tbl_1]], 
table:alias=[nested_tbl_1])
+
 PREHOOK: query: SELECT count(s1.f6), s5.f16.f18.f19
 FROM nested_tbl_1
 GROUP BY s5.f16.f18.f19
@@ -2985,12 +3003,12 @@ STAGE PLANS:
                   Pruned Column Paths: s1.f6
                   Statistics: Num rows: 1 Data size: 2012 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: s1 (type: 
struct<f1:boolean,f2:string,f3:struct<f4:int,f5:double>,f6:int>), s6 (type: 
map<string,struct<f20:array<struct<f21:struct<f22:int>>>>>)
-                    outputColumnNames: s1, s6
+                    expressions: s6['key1'].f20.f21.f22 (type: array<int>), 
s1.f6 (type: int)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1 Data size: 2012 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      aggregations: count(s1.f6)
-                      keys: s6['key1'].f20.f21.f22 (type: array<int>)
+                      aggregations: count(_col1)
+                      keys: _col0 (type: array<int>)
                       minReductionHashAggr: 0.99
                       mode: hash
                       outputColumnNames: _col0, _col1
@@ -3008,7 +3026,7 @@ STAGE PLANS:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
                 inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                notVectorizedReason: Key expression for GROUPBY operator: 
Vectorizing complex type LIST not supported
+                notVectorizedReason: SELECT operator: Could not vectorize 
expression with a LIST type without an index
                 vectorized: false
         Reducer 2 
             Execution mode: llap
@@ -3055,3 +3073,87 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@nested_tbl_1
 #### A masked pattern was here ####
 1      [1]
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_tbl_1
+                  Pruned Column Paths: s5.f16
+                  Statistics: Num rows: 1 Data size: 2880 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: s5.f16.f18.f19 (type: array<int>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 2880 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 2880 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: SELECT operator: Could not vectorize 
expression with a LIST type without an index
+                vectorized: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN CBO
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(f19=[COMPONENT_ACCESS($5.f16).f18.f19])
+  HiveTableScan(table=[[default, nested_tbl_1]], table:alias=[nested_tbl_1])
+
+PREHOOK: query: SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT s5.f16.f18.f19
+FROM nested_tbl_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@nested_tbl_1
+#### A masked pattern was here ####
+[14,28]

(hive) branch master updated: HIVE-28408: Support ARRAY field access in CBO (Ramesh Kumar, reviewed by Shohei Okumiya, Stamatis Zampetakis)

Reply via email to