This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 3c532c2 HIVE-25938: Print excluded rules from CBO (Alessandro Solimando, reviewed by Stamatis Zampetakis, John Sherman) 3c532c2 is described below commit 3c532c2cd2603edc60c721282d45390e910a0358 Author: Alessandro Solimando <alessandro.solima...@gmail.com> AuthorDate: Tue Feb 8 17:09:19 2022 +0100 HIVE-25938: Print excluded rules from CBO (Alessandro Solimando, reviewed by Stamatis Zampetakis, John Sherman) Closes #3011 --- .../apache/hadoop/hive/ql/exec/ExplainTask.java | 39 +++++-- .../hadoop/hive/ql/parse/CalcitePlanner.java | 24 ++++- .../queries/clientpositive/excluded_rule_explain.q | 11 ++ .../llap/excluded_rule_explain.q.out | 112 +++++++++++++++++++++ .../llap/rule_exclusion_config.q.out | 8 ++ 5 files changed, 184 insertions(+), 10 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index c59f44f..59f9044 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -101,8 +101,12 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { private static final Logger LOG = LoggerFactory.getLogger(ExplainTask.class.getName()); public static final String STAGE_DEPENDENCIES = "STAGE DEPENDENCIES"; + private static final String EXCLUDED_RULES_PREFIX = "Excluded rules: "; private static final long serialVersionUID = 1L; public static final String EXPL_COLUMN_NAME = "Explain"; + private static final String CBO_INFO_JSON_LABEL = "cboInfo"; + private static final String CBO_PLAN_JSON_LABEL = "CBOPlan"; + private static final String CBO_PLAN_TEXT_LABEL = "CBO PLAN:"; private final Set<Operator<?>> visitedOps = new HashSet<Operator<?>>(); private boolean isLogical = false; @@ -152,15 +156,22 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { return outJSONObject; } - public JSONObject getJSONCBOPlan(PrintStream out, ExplainWork work) throws Exception { + public JSONObject getJSONCBOPlan(PrintStream out, ExplainWork work) { JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>()); boolean jsonOutput = work.isFormatted(); String cboPlan = work.getCboPlan(); if (cboPlan != null) { + String ruleExclusionRegex = getRuleExcludedRegex(); if (jsonOutput) { - outJSONObject.put("CBOPlan", cboPlan); + outJSONObject.put(CBO_PLAN_JSON_LABEL, cboPlan); + if (!ruleExclusionRegex.isEmpty()) { + outJSONObject.put(CBO_INFO_JSON_LABEL, EXCLUDED_RULES_PREFIX + ruleExclusionRegex); + } } else { - out.println("CBO PLAN:"); + if (!ruleExclusionRegex.isEmpty()) { + out.println(EXCLUDED_RULES_PREFIX + ruleExclusionRegex + "\n"); + } + out.println(CBO_PLAN_TEXT_LABEL); out.println(cboPlan); } } @@ -272,6 +283,8 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo, String cboPlan, String optimizedSQL, String stageIdRearrange) throws Exception { + String ruleExclusionRegex = getRuleExcludedRegex(); + // If the user asked for a formatted output, dump the json output // in the output stream JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>()); @@ -282,9 +295,15 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { if (cboPlan != null) { if (jsonOutput) { - outJSONObject.put("CBOPlan", cboPlan); + outJSONObject.put(CBO_PLAN_JSON_LABEL, cboPlan); + if (!ruleExclusionRegex.isEmpty()) { + outJSONObject.put(CBO_INFO_JSON_LABEL, EXCLUDED_RULES_PREFIX + ruleExclusionRegex); + } } else { - out.print("CBO PLAN:"); + if (!ruleExclusionRegex.isEmpty()) { + out.println(EXCLUDED_RULES_PREFIX + ruleExclusionRegex); + } + out.print(CBO_PLAN_TEXT_LABEL); out.println(cboPlan); } } @@ -327,6 +346,10 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { } if (!suppressOthersForVectorization) { + if (!jsonOutput && !ruleExclusionRegex.isEmpty()) { + out.println(EXCLUDED_RULES_PREFIX + ruleExclusionRegex + "\n"); + } + JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered); if (out != null) { @@ -335,7 +358,7 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { if (jsonOutput) { if (cboInfo != null) { - outJSONObject.put("cboInfo", cboInfo); + outJSONObject.put(CBO_INFO_JSON_LABEL, cboInfo); } outJSONObject.put(STAGE_DEPENDENCIES, jsonDependencies); } @@ -952,6 +975,10 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { return invokeFlag; } + private String getRuleExcludedRegex() { + return conf == null ? "" : conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, ""); + } + @VisibleForTesting JSONObject outputPlan(Object work, PrintStream out, boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 24c1671..bc55d0e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -355,6 +355,7 @@ import static org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMateri public class CalcitePlanner extends SemanticAnalyzer { + private static final String EXCLUDED_RULES_PREFIX = "Excluded rules: "; /** * {@link org.antlr.runtime.TokenRewriteStream} offers the opportunity of multiple rewrites of the same * input text (in our case the sql query text). These rewrites are called programs and identified by a string. @@ -575,7 +576,8 @@ public class CalcitePlanner extends SemanticAnalyzer { getQB().getParseInfo().setHintList(oldHints); } LOG.info("CBO Succeeded; optimized logical plan."); - this.ctx.setCboInfo("Plan optimized by CBO."); + + this.ctx.setCboInfo(getOptimizedByCboInfo()); this.ctx.setCboSucceeded(true); } else { // 1. Convert Plan to AST @@ -628,7 +630,8 @@ public class CalcitePlanner extends SemanticAnalyzer { disableJoinMerge = defaultJoinMerge; sinkOp = genPlan(getQB()); LOG.info("CBO Succeeded; optimized logical plan."); - this.ctx.setCboInfo("Plan optimized by CBO."); + + this.ctx.setCboInfo(getOptimizedByCboInfo()); this.ctx.setCboSucceeded(true); if (this.ctx.isExplainPlan()) { // Enrich explain with information derived from CBO @@ -707,6 +710,15 @@ public class CalcitePlanner extends SemanticAnalyzer { return sinkOp; } + private String getOptimizedByCboInfo() { + String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, ""); + String cboInfo = "Plan optimized by CBO."; + if (!ruleExclusionRegex.isEmpty()) { + cboInfo = cboInfo + (" " + EXCLUDED_RULES_PREFIX + ruleExclusionRegex); + } + return cboInfo; + } + private ASTNode handleCreateViewDDL(ASTNode ast) throws SemanticException { saveViewDefinition(); String originalText = createVwDesc.getViewOriginalText(); @@ -1957,8 +1969,8 @@ public class CalcitePlanner extends SemanticAnalyzer { final RelOptCluster optCluster = basePlan.getCluster(); final PerfLogger perfLogger = SessionState.getPerfLogger(); - final boolean useMaterializedViewsRegistry = !conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname) - .equals("DUMMY"); + final boolean useMaterializedViewsRegistry = + !conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname).equals("DUMMY"); final String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, ""); final RelNode calcitePreMVRewritingPlan = basePlan; final Set<TableName> tablesUsedQuery = getTablesUsed(basePlan); @@ -2026,6 +2038,8 @@ public class CalcitePlanner extends SemanticAnalyzer { // Optimize plan if (!ruleExclusionRegex.isEmpty()) { + LOG.info("The CBO rules matching the following regex are excluded from planning: {}", + ruleExclusionRegex); planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex)); } planner.setRoot(basePlan); @@ -2451,6 +2465,8 @@ public class CalcitePlanner extends SemanticAnalyzer { } if (!ruleExclusionRegex.isEmpty()) { + LOG.info("The CBO rules matching the following regex are excluded from planning: {}", + ruleExclusionRegex); planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex)); } planner.setRoot(basePlan); diff --git a/ql/src/test/queries/clientpositive/excluded_rule_explain.q b/ql/src/test/queries/clientpositive/excluded_rule_explain.q new file mode 100644 index 0000000..245a735 --- /dev/null +++ b/ql/src/test/queries/clientpositive/excluded_rule_explain.q @@ -0,0 +1,11 @@ +EXPLAIN CBO SELECT 1; +EXPLAIN FORMATTED CBO SELECT 1; +EXPLAIN SELECT 1; +EXPLAIN FORMATTED SELECT 1; + +set hive.cbo.rule.exclusion.regex=HiveJoinPushTransitivePredicatesRule|HivePreFilteringRule; + +EXPLAIN CBO SELECT 1; +EXPLAIN FORMATTED CBO SELECT 1; +EXPLAIN SELECT 1; +EXPLAIN FORMATTED SELECT 1; diff --git a/ql/src/test/results/clientpositive/llap/excluded_rule_explain.q.out b/ql/src/test/results/clientpositive/llap/excluded_rule_explain.q.out new file mode 100644 index 0000000..0ada0c3 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/excluded_rule_explain.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: EXPLAIN CBO SELECT 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO SELECT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +CBO PLAN: +HiveProject(_o__c0=[1]) + HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]) + +PREHOOK: query: EXPLAIN FORMATTED CBO SELECT 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN FORMATTED CBO SELECT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"_dummy_database\",\n \"_dummy_table\"\n ],\n \"table:alias\": \"_dummy_table\",\n \"inputs\": [],\n \"rowCount\": 1.0,\n \"avgRowSize\": 129.0,\n \"rowType\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET [...] +PREHOOK: query: EXPLAIN SELECT 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN FORMATTED SELECT 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN FORMATTED SELECT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"_dummy_database\",\n \"_dummy_table\"\n ],\n \"table:alias\": \"_dummy_table\",\n \"inputs\": [],\n \"rowCount\": 1.0,\n \"avgRowSize\": 0.0,\n \"rowType\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__ [...] +PREHOOK: query: EXPLAIN CBO SELECT 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO SELECT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +Excluded rules: HiveJoinPushTransitivePredicatesRule|HivePreFilteringRule + +CBO PLAN: +HiveProject(_o__c0=[1]) + HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]) + +PREHOOK: query: EXPLAIN FORMATTED CBO SELECT 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN FORMATTED CBO SELECT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"_dummy_database\",\n \"_dummy_table\"\n ],\n \"table:alias\": \"_dummy_table\",\n \"inputs\": [],\n \"rowCount\": 1.0,\n \"avgRowSize\": 129.0,\n \"rowType\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET [...] +PREHOOK: query: EXPLAIN SELECT 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +Excluded rules: HiveJoinPushTransitivePredicatesRule|HivePreFilteringRule + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: EXPLAIN FORMATTED SELECT 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN FORMATTED SELECT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"_dummy_database\",\n \"_dummy_table\"\n ],\n \"table:alias\": \"_dummy_table\",\n \"inputs\": [],\n \"rowCount\": 1.0,\n \"avgRowSize\": 0.0,\n \"rowType\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__ [...] diff --git a/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out index a656edf..298c4a7 100644 --- a/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out +++ b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out @@ -47,6 +47,8 @@ WHERE src1.key > 10 and src1.key < 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +Excluded rules: HiveJoinPushTransitivePredicatesRule + CBO PLAN: HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], value1=[$1]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -79,6 +81,8 @@ WHERE src1.key > 10 and src1.key < 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +Excluded rules: HiveJoinPushTransitivePredicatesRule|HiveJoinAddNotNullRule + CBO PLAN: HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], value1=[$1]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -109,6 +113,8 @@ WHERE src1.key > 10 and src1.key < 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +Excluded rules: HiveJoin.*Rule + CBO PLAN: HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], value1=[$1]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -139,6 +145,8 @@ WHERE src1.key > 10 and src1.key < 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +Excluded rules: .* + CBO PLAN: HiveProject(key=[$0], value=[$1], key1=[$6], value1=[$7], key2=[$12], value2=[$13]) HiveFilter(condition=[AND(>(CAST($0):DOUBLE, CAST(10):DOUBLE), <(CAST($0):DOUBLE, CAST(20):DOUBLE))])