This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 090e3a6c7f70339c3a3549856de3bfcc05ea1936 Author: Bowen Song <bowen.s...@kyligence.io> AuthorDate: Sun Nov 20 15:23:41 2022 +0800 KYLIN-5412 add condition to calcite project merge rule to fix OOM --- pom.xml | 2 +- .../org/apache/kylin/common/KylinConfigBase.java | 12 +++++-- .../kylin/query/engine/SqlToRelNodeTest.java | 37 ++++++++++++++++++++++ .../apache/kylin/query/engine/SqlToRelNodeTest.xml | 26 +++++++++++++++ .../kylin/query/engine/SumExprPlannerTest.xml | 36 +++++++++++---------- .../kap/query/optrule/KapProjectMergeRule.java | 14 ++++++-- 6 files changed, 106 insertions(+), 21 deletions(-) diff --git a/pom.xml b/pom.xml index 5b85bc60bf..f9304728e8 100644 --- a/pom.xml +++ b/pom.xml @@ -73,7 +73,7 @@ <scala-retry>0.3.0</scala-retry> <!-- Calcite Version --> - <calcite.version>1.116.0-kylin-4.x-r023</calcite.version> + <calcite.version>1.116.0-kylin-4.x-r024</calcite.version> <avatica.version>4.x_1.10-r01</avatica.version> <!-- Hadoop Common deps, keep compatible with hadoop2.version --> diff --git a/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 95fa963d5e..4405eede60 100644 --- a/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -3712,11 +3712,19 @@ public abstract class KylinConfigBase implements Serializable { return Integer.parseInt(getOptional("kylin.second-storage.wait-lock-timeout", "180")); } + public boolean getDDLEnabled() { + return Boolean.parseBoolean(getOptional("kylin.source.ddl.enabled", FALSE)); + } + public boolean isBuildSegmentOverlapEnabled() { return Boolean.parseBoolean(getOptional("kylin.build.segment-overlap-enabled", FALSE)); } - public boolean getDDLEnabled() { - return Boolean.parseBoolean(getOptional("kylin.source.ddl.enabled", FALSE)); + public boolean isProjectMergeWithBloatEnabled() { + return Boolean.parseBoolean(getOptional("kylin.query.kap-project-merge-with-bloat-enabled", "true")); + } + + public int getKapProjectMergeRuleBloatThreshold() { + return Integer.parseInt(getOptional("kylin.query.kap-project-merge-bloat-threshold", "0")); } } diff --git a/src/kylin-it/src/test/java/org/apache/kylin/query/engine/SqlToRelNodeTest.java b/src/kylin-it/src/test/java/org/apache/kylin/query/engine/SqlToRelNodeTest.java index 09a6ff74ad..64f5ea16d2 100644 --- a/src/kylin-it/src/test/java/org/apache/kylin/query/engine/SqlToRelNodeTest.java +++ b/src/kylin-it/src/test/java/org/apache/kylin/query/engine/SqlToRelNodeTest.java @@ -84,6 +84,43 @@ public class SqlToRelNodeTest extends CalciteRuleTestBase { checkSQLOptimize(PROJECT, sql.getSecond(), "query_sql_in_query02"); } + @Test + public void testProjectMergeWithBloat() throws Exception { + String sql = "select q.x + q.x from( select (p.v + p.v) as x from (select (case when TRANS_ID > 60 then 1 else 0 end) v from test_kylin_fact) p)q"; + + try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig.setAndUnsetThreadLocalConfig(config)) { + config.setProperty("kylin.query.kap-project-merge-with-bloat-enabled", "false"); + QueryExec exec1 = new QueryExec(PROJECT, config); + RelRoot relRoot = exec1.sqlToRelRoot(sql); + RelNode rel = exec1.optimize(relRoot).rel; + String realPlan = NL + RelOptUtil.toString(rel); + diff.assertEquals("bloat_merge_sql.bloat_disabled", "${bloat_merge_sql.bloat_disabled}", realPlan); + + config.setProperty("kylin.query.kap-project-merge-with-bloat-enabled", "true"); + QueryExec exec2 = new QueryExec(PROJECT, config); + relRoot = exec2.sqlToRelRoot(sql); + rel = exec2.optimize(relRoot).rel; + realPlan = NL + RelOptUtil.toString(rel); + diff.assertEquals("bloat_merge_sql.bloat_enabled", "${bloat_merge_sql.bloat_enabled}", realPlan); + + config.setProperty("kylin.query.kap-project-merge-with-bloat-enabled", "true"); + config.setProperty("kylin.query.kap-project-merge-bloat-threshold", "5"); + QueryExec exec3 = new QueryExec(PROJECT, config); + relRoot = exec3.sqlToRelRoot(sql); + rel = exec3.optimize(relRoot).rel; + realPlan = NL + RelOptUtil.toString(rel); + diff.assertEquals("bloat_merge_sql.bloat_enabled_bloat_5", "${bloat_merge_sql.bloat_enabled_bloat_5}", realPlan); + + config.setProperty("kylin.query.kap-project-merge-bloat-threshold", "100"); + QueryExec exec4 = new QueryExec(PROJECT, config); + relRoot = exec4.sqlToRelRoot(sql); + rel = exec4.optimize(relRoot).rel; + realPlan = NL + RelOptUtil.toString(rel); + diff.assertEquals("bloat_merge_sql.bloat_disabled", "${bloat_merge_sql.bloat_disabled}", realPlan); + } + + } + /** * Visitor that checks that every {@link RelNode} in a tree is valid. * diff --git a/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SqlToRelNodeTest.xml b/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SqlToRelNodeTest.xml index 3b02bd2722..dca32e385e 100644 --- a/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SqlToRelNodeTest.xml +++ b/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SqlToRelNodeTest.xml @@ -144,6 +144,32 @@ KapOLAPToEnumerableConverter KapProjectRel(LSTG_FORMAT_NAME=[$3], ctx=[]) KapFilterRel(condition=[OR(=($3, null), =($3, 'FP-GTC'))], ctx=[]) KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) +]]> + </Resource> + </TestCase> + <TestCase name="testProjectMergeWithBloat"> + <Resource name="bloat_merge_sql.bloat_disabled"> + <![CDATA[ +KapOLAPToEnumerableConverter + KapProjectRel(EXPR$0=[+(+(CASE(>($0, 60), 1, 0), CASE(>($0, 60), 1, 0)), +(CASE(>($0, 60), 1, 0), CASE(>($0, 60), 1, 0)))], ctx=[]) + KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) +]]> + </Resource> + <Resource name="bloat_merge_sql.bloat_enabled"> + <![CDATA[ +KapOLAPToEnumerableConverter + KapProjectRel(EXPR$0=[+($0, $0)], ctx=[]) + KapProjectRel(X=[+($0, $0)], ctx=[]) + KapProjectRel(V=[CASE(>($0, 60), 1, 0)], ctx=[]) + KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) +]]> + </Resource> + <Resource name="bloat_merge_sql.bloat_enabled_bloat_5"> + <![CDATA[ +KapOLAPToEnumerableConverter + KapProjectRel(EXPR$0=[+(+($0, $0), +($0, $0))], ctx=[]) + KapProjectRel(V=[CASE(>($0, 60), 1, 0)], ctx=[]) + KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) ]]> </Resource> </TestCase> diff --git a/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SumExprPlannerTest.xml b/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SumExprPlannerTest.xml index 1c5a41e368..e7b560f9a5 100644 --- a/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SumExprPlannerTest.xml +++ b/src/kylin-it/src/test/resources/org/apache/kylin/query/engine/SumExprPlannerTest.xml @@ -361,16 +361,18 @@ KapOLAPToEnumerableConverter KapUnionRel(all=[true], ctx=[], all=[true]) KapProjectRel(EXPR=[$0], PRI=[CAST(/($1, $2)):BIGINT], PPP=[CAST(/(-($3, /(*($4, $4), $5)), $5)):BIGINT], ctx=[]) KapAggregateRel(group-set=[[]], groups=[null], EXPR=[SUM($0)], agg#1=[SUM($1)], agg#2=[COUNT($1)], agg#3=[SUM($2)], agg#4=[SUM($3)], agg#5=[COUNT($3)], ctx=[]) - KapProjectRel($f0=[*($0, 2)], $f1=[*(/($1, $2), 2)], $f3=[*(/($1, $2), /($1, $2))], PRI=[/($1, $2)], ctx=[]) - KapAggregateRel(group-set=[[]], groups=[null], A1=[SUM($0)], A2=[SUM($1)], agg#2=[COUNT($1)], ctx=[]) - KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[]) - KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) + KapProjectRel($f0=[$0], $f1=[$1], $f3=[*($2, $2)], PRI=[$2], ctx=[]) + KapProjectRel($f0=[*($0, 2)], $f1=[*(/($1, $2), 2)], PRI=[/($1, $2)], ctx=[]) + KapAggregateRel(group-set=[[]], groups=[null], A1=[SUM($0)], A2=[SUM($1)], agg#2=[COUNT($1)], ctx=[]) + KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[]) + KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) KapProjectRel(EXPR=[$0], PRI=[CAST(/($1, $2)):BIGINT], PPP=[CAST(/(-($3, /(*($4, $4), $5)), $5)):BIGINT], ctx=[]) KapAggregateRel(group-set=[[]], groups=[null], EXPR=[SUM($0)], agg#1=[SUM($1)], agg#2=[COUNT($1)], agg#3=[SUM($2)], agg#4=[SUM($3)], agg#5=[COUNT($3)], ctx=[]) - KapProjectRel($f0=[*($1, 2)], $f1=[*(/($2, $3), 2)], $f3=[*(/($2, $3), /($2, $3))], PRI=[/($2, $3)], ctx=[]) - KapAggregateRel(group-set=[[]], groups=[null], MIN_PRI=[MIN($0)], A1=[SUM($0)], A2=[SUM($1)], agg#3=[COUNT($1)], ctx=[]) - KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[]) - KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) + KapProjectRel($f0=[$0], $f1=[$1], $f3=[*($2, $2)], PRI=[$2], ctx=[]) + KapProjectRel($f0=[*($1, 2)], $f1=[*(/($2, $3), 2)], PRI=[/($2, $3)], ctx=[]) + KapAggregateRel(group-set=[[]], groups=[null], MIN_PRI=[MIN($0)], A1=[SUM($0)], A2=[SUM($1)], agg#3=[COUNT($1)], ctx=[]) + KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[]) + KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) ]]> </Resource> <Resource name="query11.planAfter"> @@ -379,16 +381,18 @@ KapOLAPToEnumerableConverter KapUnionRel(all=[true], ctx=[], all=[true]) KapProjectRel(EXPR=[$0], PRI=[CAST(/($1, $2)):BIGINT], PPP=[CAST(/(-($3, /(*($4, $4), $5)), $5)):BIGINT], ctx=[]) KapAggregateRel(group-set=[[]], groups=[null], EXPR=[SUM($0)], agg#1=[SUM($1)], agg#2=[COUNT($1)], agg#3=[SUM($2)], agg#4=[SUM($3)], agg#5=[COUNT($3)], ctx=[]) - KapProjectRel($f0=[*($0, 2)], $f1=[*(/($1, $2), 2)], $f3=[*(/($1, $2), /($1, $2))], PRI=[/($1, $2)], ctx=[]) - KapAggregateRel(group-set=[[]], groups=[null], A1=[SUM($0)], A2=[SUM($1)], agg#2=[COUNT($1)], ctx=[]) - KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[]) - KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) + KapProjectRel($f0=[$0], $f1=[$1], $f3=[*($2, $2)], PRI=[$2], ctx=[]) + KapProjectRel($f0=[*($0, 2)], $f1=[*(/($1, $2), 2)], PRI=[/($1, $2)], ctx=[]) + KapAggregateRel(group-set=[[]], groups=[null], A1=[SUM($0)], A2=[SUM($1)], agg#2=[COUNT($1)], ctx=[]) + KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[]) + KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) KapProjectRel(EXPR=[$0], PRI=[CAST(/($1, $2)):BIGINT], PPP=[CAST(/(-($3, /(*($4, $4), $5)), $5)):BIGINT], ctx=[]) KapAggregateRel(group-set=[[]], groups=[null], EXPR=[SUM($0)], agg#1=[SUM($1)], agg#2=[COUNT($1)], agg#3=[SUM($2)], agg#4=[SUM($3)], agg#5=[COUNT($3)], ctx=[]) - KapProjectRel($f0=[*($1, 2)], $f1=[*(/($2, $3), 2)], $f3=[*(/($2, $3), /($2, $3))], PRI=[/($2, $3)], ctx=[]) - KapAggregateRel(group-set=[[]], groups=[null], MIN_PRI=[MIN($0)], A1=[SUM($0)], A2=[SUM($1)], agg#3=[COUNT($1)], ctx=[]) - KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[]) - KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) + KapProjectRel($f0=[$0], $f1=[$1], $f3=[*($2, $2)], PRI=[$2], ctx=[]) + KapProjectRel($f0=[*($1, 2)], $f1=[*(/($2, $3), 2)], PRI=[/($2, $3)], ctx=[]) + KapAggregateRel(group-set=[[]], groups=[null], MIN_PRI=[MIN($0)], A1=[SUM($0)], A2=[SUM($1)], agg#3=[COUNT($1)], ctx=[]) + KapProjectRel(PRICE=[$8], ITEM_COUNT=[$9], ctx=[]) + KapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) ]]> </Resource> <Resource name="query12.planBefore"> diff --git a/src/query/src/main/java/io/kyligence/kap/query/optrule/KapProjectMergeRule.java b/src/query/src/main/java/io/kyligence/kap/query/optrule/KapProjectMergeRule.java index f163ad7caa..8aefc1d645 100644 --- a/src/query/src/main/java/io/kyligence/kap/query/optrule/KapProjectMergeRule.java +++ b/src/query/src/main/java/io/kyligence/kap/query/optrule/KapProjectMergeRule.java @@ -36,6 +36,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.Permutation; +import org.apache.kylin.common.KylinConfig; import com.google.common.collect.Sets; @@ -54,7 +55,6 @@ import com.google.common.collect.Sets; * */ public class KapProjectMergeRule extends RelOptRule { - public static final KapProjectMergeRule INSTANCE = new KapProjectMergeRule(true, RelFactories.LOGICAL_BUILDER); //~ Instance fields -------------------------------------------------------- @@ -120,7 +120,17 @@ public class KapProjectMergeRule extends RelOptRule { return; } - final List<RexNode> pushedProjects = RelOptUtil.pushPastProject(topProject.getProjects(), bottomProject); + final List<RexNode> pushedProjects; + if (KylinConfig.getInstanceFromEnv().isProjectMergeWithBloatEnabled()) { + pushedProjects = RelOptUtil.pushPastProjectUnlessBloat(topProject.getProjects(), + bottomProject, KylinConfig.getInstanceFromEnv().getKapProjectMergeRuleBloatThreshold()); + if (pushedProjects == null) { + // Merged projects are significantly more complex. Do not merge. + return; + } + } else { + pushedProjects = RelOptUtil.pushPastProject(topProject.getProjects(), bottomProject); + } final List<RexNode> newProjects = simplify(pushedProjects); final RelNode input = bottomProject.getInput(); if (RexUtil.isIdentity(newProjects, input.getRowType())