This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 4f22ecde118 HIVE-28720: EXPLAIN CBO is empty when
hive.cbo.returnpath.hiveop is true (Stamatis Zampetakis reviewed by Krisztian
Kasa)
4f22ecde118 is described below
commit 4f22ecde118a35bb5217cb09dba4bb094494f249
Author: Stamatis Zampetakis <[email protected]>
AuthorDate: Wed Jan 29 08:39:12 2025 +0100
HIVE-28720: EXPLAIN CBO is empty when hive.cbo.returnpath.hiveop is true
(Stamatis Zampetakis reviewed by Krisztian Kasa)
Closes apache/hive#5619
---
.../hadoop/hive/ql/parse/CalcitePlanner.java | 74 +++++++++++-----------
.../test/queries/clientpositive/cbo_rp_explain.q | 13 ++++
.../clientpositive/llap/cbo_rp_explain.q.out | 61 ++++++++++++++++++
.../llap/groupby_ppr_multi_distinct.q.out | 4 ++
4 files changed, 114 insertions(+), 38 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 143f85769f3..c662417b371 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -590,15 +590,16 @@ Operator genOPTree(ASTNode ast, PlannerContext
plannerCtx) throws SemanticExcept
if (cboCtx.type == PreCboCtx.Type.VIEW && !materializedView) {
throw new SemanticException("Create view is not supported in cbo
return path.");
}
+ newPlan =
+ PlanModifierForReturnPath.convertOpTree(newPlan, resultSchema,
this.getQB().getTableDesc() != null);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Plan after return path modifier:\n" +
RelOptUtil.toString(newPlan));
+ }
sinkOp = getOptimizedHiveOPDag(newPlan);
if (oldHints.size() > 0) {
LOG.debug("Propagating hints to QB: " + oldHints);
getQB().getParseInfo().setHintList(oldHints);
}
- LOG.info("CBO Succeeded; optimized logical plan.");
-
- this.ctx.setCboInfo(getOptimizedByCboInfo());
- this.ctx.setCboSucceeded(true);
} else {
// 1. Convert Plan to AST
ASTNode newAST = getOptimizedAST(newPlan);
@@ -649,39 +650,40 @@ Operator genOPTree(ASTNode ast, PlannerContext
plannerCtx) throws SemanticExcept
disableJoinMerge = defaultJoinMerge;
sinkOp = genPlan(getQB());
- LOG.info("CBO Succeeded; optimized logical plan.");
-
- this.ctx.setCboInfo(getOptimizedByCboInfo());
- this.ctx.setCboSucceeded(true);
- if (this.ctx.isExplainPlan()) {
- // Enrich explain with information derived from CBO
- ExplainConfiguration explainConfig = this.ctx.getExplainConfig();
- if (explainConfig.isCbo()) {
- if (!explainConfig.isCboJoinCost()) {
- // Include cost as provided by Calcite
- newPlan.getCluster().invalidateMetadataQuery();
-
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
- }
- if (explainConfig.isFormatted()) {
-
this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
- } else if (explainConfig.isCboCost() ||
explainConfig.isCboJoinCost()) {
- this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan,
SqlExplainLevel.ALL_ATTRIBUTES));
- } else {
- // Do not include join cost
- this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan));
- }
- } else if (explainConfig.isFormatted()) {
- this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
- this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
- } else if (explainConfig.isExtended()) {
- this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
- }
- }
if (LOG.isTraceEnabled()) {
- LOG.trace(getOptimizedSql(newPlan));
LOG.trace(newAST.dump());
}
}
+ LOG.info("CBO Succeeded; optimized logical plan.");
+ this.ctx.setCboInfo(getOptimizedByCboInfo());
+ this.ctx.setCboSucceeded(true);
+ if (this.ctx.isExplainPlan()) {
+ // Enrich explain with information derived from CBO
+ ExplainConfiguration explainConfig = this.ctx.getExplainConfig();
+ if (explainConfig.isCbo()) {
+ if (!explainConfig.isCboJoinCost()) {
+ // Include cost as provided by Calcite
+ newPlan.getCluster().invalidateMetadataQuery();
+
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
+ }
+ if (explainConfig.isFormatted()) {
+ this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
+ } else if (explainConfig.isCboCost() ||
explainConfig.isCboJoinCost()) {
+ this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan,
SqlExplainLevel.ALL_ATTRIBUTES));
+ } else {
+ // Do not include join cost
+ this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan));
+ }
+ } else if (explainConfig.isFormatted()) {
+ this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
+ this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
+ } else if (explainConfig.isExtended()) {
+ this.ctx.setOptimizedSql(getOptimizedSql(newPlan));
+ }
+ }
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(getOptimizedSql(newPlan));
+ }
perfLogger.perfLogEnd(this.getClass().getName(),
PerfLogger.GENERATE_OPERATOR_TREE);
} catch (Exception e) {
LOG.error("CBO failed, skipping CBO. ", e);
@@ -1415,12 +1417,8 @@ ASTNode getOptimizedAST(RelNode optimizedOptiqPlan)
throws SemanticException {
* @throws SemanticException
*/
Operator getOptimizedHiveOPDag(RelNode optimizedOptiqPlan) throws
SemanticException {
- RelNode modifiedOptimizedOptiqPlan =
PlanModifierForReturnPath.convertOpTree(
- optimizedOptiqPlan, resultSchema, this.getQB().getTableDesc() != null);
-
- LOG.debug("Translating the following plan:\n" +
RelOptUtil.toString(modifiedOptimizedOptiqPlan));
Operator<?> hiveRoot = new HiveOpConverter(this, conf, unparseTranslator,
topOps)
- .convert(modifiedOptimizedOptiqPlan);
+ .convert(optimizedOptiqPlan);
RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB());
opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR));
String dest = getQB().getParseInfo().getClauseNames().iterator().next();
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_explain.q
b/ql/src/test/queries/clientpositive/cbo_rp_explain.q
new file mode 100644
index 00000000000..4d2e8a8cdec
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_explain.q
@@ -0,0 +1,13 @@
+set hive.cbo.returnpath.hiveop=true;
+
+CREATE TABLE author (id INT, fname STRING, lname STRING, birth DATE);
+CREATE TABLE book (id INT, title STRING, author INT);
+
+EXPLAIN CBO
+SELECT lname, MAX(birth) FROM author GROUP BY lname;
+
+EXPLAIN CBO
+SELECT author.lname, book.title
+FROM author
+INNER JOIN book ON author.id=book.author
+WHERE author.fname = 'Victor';
diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_explain.q.out
b/ql/src/test/results/clientpositive/llap/cbo_rp_explain.q.out
new file mode 100644
index 00000000000..f5964fecd36
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_rp_explain.q.out
@@ -0,0 +1,61 @@
+PREHOOK: query: CREATE TABLE author (id INT, fname STRING, lname STRING, birth
DATE)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@author
+POSTHOOK: query: CREATE TABLE author (id INT, fname STRING, lname STRING,
birth DATE)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@author
+PREHOOK: query: CREATE TABLE book (id INT, title STRING, author INT)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@book
+POSTHOOK: query: CREATE TABLE book (id INT, title STRING, author INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@book
+PREHOOK: query: EXPLAIN CBO
+SELECT lname, MAX(birth) FROM author GROUP BY lname
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT lname, MAX(birth) FROM author GROUP BY lname
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAggregate(group=[{0}], agg#0=[max($1)])
+ HiveProject(lname=[$2], birth=[$3])
+ HiveTableScan(table=[[default, author]], qbid:alias=[author])
+
+PREHOOK: query: EXPLAIN CBO
+SELECT author.lname, book.title
+FROM author
+INNER JOIN book ON author.id=book.author
+WHERE author.fname = 'Victor'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@author
+PREHOOK: Input: default@book
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT author.lname, book.title
+FROM author
+INNER JOIN book ON author.id=book.author
+WHERE author.fname = 'Victor'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@author
+POSTHOOK: Input: default@book
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(lname=[$2], title=[$3])
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveSortExchange(distribution=[hash[0]], collation=[[0]])
+ HiveProject(id=[$0], fname=[CAST(_UTF-16LE'Victor':VARCHAR(2147483647)
CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"],
lname=[$2])
+ HiveFilter(condition=[AND(=($1, _UTF-16LE'Victor'), IS NOT NULL($0))])
+ HiveTableScan(table=[[default, author]], qbid:alias=[author])
+ HiveSortExchange(distribution=[hash[1]], collation=[[1]])
+ HiveProject(title=[$1], author=[$2])
+ HiveFilter(condition=[IS NOT NULL($2)])
+ HiveTableScan(table=[[default, book]], qbid:alias=[book])
+
diff --git
a/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out
b/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out
index 19954771746..92b854ec157 100644
--- a/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/llap/groupby_ppr_multi_distinct.q.out
@@ -340,6 +340,10 @@ POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
POSTHOOK: Output: default@dest1
+OPTIMIZED SQL: SELECT SUBSTR(`key`, 1, 1) AS `_o__c0`, COUNT(DISTINCT
SUBSTR(`value`, 5)) AS `_o__c1`, SUBSTR(`key`, 1, 1) || SUM(SUBSTR(`value`, 5))
AS `_o__c2`, SUM(DISTINCT SUBSTR(`value`, 5)) AS `_o__c3`, COUNT(DISTINCT
`value`) AS `_o__c4`
+FROM `default`.`srcpart`
+WHERE `ds` = '2008-04-08'
+GROUP BY SUBSTR(`key`, 1, 1)
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1