This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5a0ad09856 [fix](nereids) SubqueryToApply may lost conjunct (#22262)
5a0ad09856 is described below
commit 5a0ad098560e0ab0819249bc81a281110c1abbe4
Author: starocean999 <[email protected]>
AuthorDate: Fri Jul 28 15:08:56 2023 +0800
[fix](nereids) SubqueryToApply may lost conjunct (#22262)
consider sql:
```
SELECT *
FROM sub_query_correlated_subquery1 t1
WHERE coalesce(bitand(
cast(
(SELECT sum(k1)
FROM sub_query_correlated_subquery3 ) AS int),
cast(t1.k1 AS int)),
coalesce(t1.k1, t1.k2)) is NULL
ORDER BY t1.k1, t1.k2;
```
is Null conjunct is lost in SubqueryToApply rule. This pr fix it
---
.../nereids/rules/analysis/SubqueryToApply.java | 140 +++++----------------
.../nereids_syntax_p0/sub_query_correlated.out | 17 +++
.../nereids_syntax_p0/sub_query_correlated.groovy | 23 ++--
3 files changed, 66 insertions(+), 114 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
index fb39794c23..6b89d02782 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java
@@ -24,15 +24,11 @@ import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.BinaryOperator;
import org.apache.doris.nereids.trees.expressions.CaseWhen;
-import org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
-import org.apache.doris.nereids.trees.expressions.CompoundPredicate;
import org.apache.doris.nereids.trees.expressions.Exists;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.InSubquery;
-import org.apache.doris.nereids.trees.expressions.IsNull;
import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
-import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.ScalarSubquery;
import org.apache.doris.nereids.trees.expressions.SlotReference;
@@ -46,6 +42,7 @@ import
org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
@@ -196,7 +193,7 @@ public class SubqueryToApply implements AnalysisRuleFactory
{
subquery.getCorrelateSlots(),
subquery, Optional.empty(),
subqueryToMarkJoinSlot.get(subquery),
- mergeScalarSubConjectAndFilterConject(
+ mergeScalarSubConjunctAndFilterConjunct(
subquery, subqueryCorrespondingConject,
conjunct, needAddSubOutputToProjects, singleSubquery),
isProject,
childPlan, subquery.getQueryPlan());
@@ -245,7 +242,7 @@ public class SubqueryToApply implements AnalysisRuleFactory
{
* LogicalJoin(otherConjunct[k2 = c2]) ---> inSub
* LogicalJoin(otherConjunct[k1 > sum(c1)]) ---> scalarSub
*/
- private Optional<Expression> mergeScalarSubConjectAndFilterConject(
+ private Optional<Expression> mergeScalarSubConjunctAndFilterConjunct(
SubqueryExpr subquery,
Map<SubqueryExpr, Expression> subqueryCorrespondingConject,
Optional<Expression> conjunct,
@@ -292,8 +289,20 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
.collect(ImmutableSet.toImmutableSet());
}
- public Expression replace(Expression expressions, SubqueryContext
subqueryContext) {
- return expressions.accept(this, subqueryContext);
+ public Expression replace(Expression expression, SubqueryContext
subqueryContext) {
+ Expression replacedExpr = doReplace(expression, subqueryContext);
+ if (subqueryContext.onlySingleSubquery() && !isMarkJoin) {
+ // if there is only one subquery and it's not a mark join,
+ // we can merge the filter with the join conjunct to eliminate
the filter node
+ // to do that, we need update the subquery's corresponding
conjunct use replacedExpr
+ // see mergeScalarSubConjunctAndFilterConjunct() for more info
+
subqueryContext.updateSubqueryCorrespondingConjunct(replacedExpr);
+ }
+ return replacedExpr;
+ }
+
+ public Expression doReplace(Expression expression, SubqueryContext
subqueryContext) {
+ return expression.accept(this, subqueryContext);
}
@Override
@@ -342,52 +351,16 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
return isMarkJoin ? markJoinSlotReference :
scalar.getSubqueryOutput();
}
- @Override
- public Expression visitNot(Not not, SubqueryContext context) {
- // Need to re-update scalarSubQuery unequal conditions into
subqueryCorrespondingConject
- if (not.child() instanceof BinaryOperator
- && (((BinaryOperator)
not.child()).left().containsType(ScalarSubquery.class)
- || ((BinaryOperator)
not.child()).right().containsType(ScalarSubquery.class))) {
- Expression newChild = replace(not.child(), context);
- ScalarSubquery subquery =
collectScalarSubqueryForBinaryOperator((BinaryOperator) not.child());
- context.updateSubqueryCorrespondingConjunctInNot(subquery);
- return
context.getSubqueryToMarkJoinSlotValue(subquery).isPresent() ? newChild : new
Not(newChild);
- }
-
- return visit(not, context);
- }
-
- @Override
- public Expression visitIsNull(IsNull isNull, SubqueryContext context) {
- // Need to re-update scalarSubQuery unequal conditions into
subqueryCorrespondingConject
- if (isNull.child() instanceof BinaryOperator
- && (((BinaryOperator)
isNull.child()).left().containsType(ScalarSubquery.class)
- || ((BinaryOperator)
isNull.child()).right().containsType(ScalarSubquery.class))) {
- Expression newChild = replace(isNull.child(), context);
- ScalarSubquery subquery =
collectScalarSubqueryForBinaryOperator((BinaryOperator) isNull.child());
- context.updateSubqueryCorrespondingConjunctIsNull(subquery);
- return
context.getSubqueryToMarkJoinSlotValue(subquery).isPresent() ? newChild : new
IsNull(newChild);
- }
-
- return visit(isNull, context);
- }
-
@Override
public Expression visitBinaryOperator(BinaryOperator binaryOperator,
SubqueryContext context) {
- boolean atLeastOneChildContainsScalarSubquery =
- binaryOperator.left().containsType(ScalarSubquery.class)
- ||
binaryOperator.right().containsType(ScalarSubquery.class);
- boolean currentMarkJoin =
((binaryOperator.left().anyMatch(SubqueryExpr.class::isInstance)
- ||
binaryOperator.right().anyMatch(SubqueryExpr.class::isInstance))
- && (binaryOperator instanceof Or)) ||
isMarkJoin;
- isMarkJoin = currentMarkJoin;
- Expression left = replace(binaryOperator.left(), context);
- isMarkJoin = currentMarkJoin;
- Expression right = replace(binaryOperator.right(), context);
-
- if (atLeastOneChildContainsScalarSubquery && !(binaryOperator
instanceof CompoundPredicate)) {
- return context.replaceBinaryOperator(binaryOperator, left,
right, isProject);
- }
+ // update isMarkJoin flag
+ isMarkJoin =
+ isMarkJoin ||
((binaryOperator.left().anyMatch(SubqueryExpr.class::isInstance)
+ ||
binaryOperator.right().anyMatch(SubqueryExpr.class::isInstance))
+ && (binaryOperator instanceof Or));
+
+ Expression left = doReplace(binaryOperator.left(), context);
+ Expression right = doReplace(binaryOperator.right(), context);
return binaryOperator.withChildren(left, right);
}
@@ -399,11 +372,11 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
* For inSubquery and exists: it will be directly replaced by
markSlotReference
* e.g.
* logicalFilter(predicate=exists) ---> logicalFilter(predicate=$c$1)
- * For scalarSubquery: will replace the connected ComparisonPredicate with
markSlotReference
+ * For scalarSubquery: it will be replaced by markSlotReference too
* e.g.
- * logicalFilter(predicate=k1 > scalarSubquery) --->
logicalFilter(predicate=$c$1)
+ * logicalFilter(predicate=k1 > scalarSubquery) --->
logicalFilter(predicate=k1 > $c$1)
*
- * subqueryCorrespondingConject: Record the conject corresponding to the
subquery.
+ * subqueryCorrespondingConjunct: Record the conject corresponding to the
subquery.
* rule:
*
*
@@ -427,10 +400,6 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
return subqueryCorrespondingConjunct;
}
- private Optional<MarkJoinSlotReference>
getSubqueryToMarkJoinSlotValue(SubqueryExpr subqueryExpr) {
- return subqueryToMarkJoinSlot.get(subqueryExpr);
- }
-
private void setSubqueryToMarkJoinSlot(SubqueryExpr subquery,
Optional<MarkJoinSlotReference>
markJoinSlotReference) {
subqueryToMarkJoinSlot.put(subquery, markJoinSlotReference);
@@ -445,56 +414,13 @@ public class SubqueryToApply implements
AnalysisRuleFactory {
return subqueryToMarkJoinSlot.size() == 1;
}
- private void updateSubqueryCorrespondingConjunctInNot(SubqueryExpr
subquery) {
- if (subqueryCorrespondingConjunct.containsKey(subquery)) {
- subqueryCorrespondingConjunct.replace(subquery,
- new Not(subqueryCorrespondingConjunct.get(subquery)));
- }
- }
-
- private void updateSubqueryCorrespondingConjunctIsNull(SubqueryExpr
subquery) {
- if (subqueryCorrespondingConjunct.containsKey(subquery)) {
- subqueryCorrespondingConjunct.replace(subquery,
- new
IsNull(subqueryCorrespondingConjunct.get(subquery)));
- }
+ private void updateSubqueryCorrespondingConjunct(Expression
expression) {
+ Preconditions.checkState(onlySingleSubquery(),
+ "onlySingleSubquery must be true");
+ subqueryCorrespondingConjunct
+ .forEach((k, v) -> subqueryCorrespondingConjunct.put(k,
expression));
}
- /**
- * For scalarSubQuery and MarkJoin, it will be replaced by
markSlotReference
- * e.g.
- * logicalFilter(predicate=k1 > scalarSub or exists)
- * -->
- * logicalFilter(predicate=$c$1 or $c$2)
- */
- private Expression replaceBinaryOperator(BinaryOperator binaryOperator,
- Expression left,
- Expression right,
- boolean isProject) {
- boolean leftContaionsScalar =
binaryOperator.left().containsType(ScalarSubquery.class);
- ScalarSubquery subquery =
collectScalarSubqueryForBinaryOperator(binaryOperator);
-
- // record the result in subqueryCorrespondingConjunct
- Expression newLeft = leftContaionsScalar &&
subqueryToMarkJoinSlot.get(subquery).isPresent()
- ? subqueryCorrespondingConjunct.get(subquery) : left;
- Expression newRight = !leftContaionsScalar &&
subqueryToMarkJoinSlot.get(subquery).isPresent()
- ? subqueryCorrespondingConjunct.get(subquery) : right;
- Expression newBinary = binaryOperator.withChildren(newLeft,
newRight);
- subqueryCorrespondingConjunct.put(subquery,
- (isProject ? (leftContaionsScalar ? newLeft : newRight) :
newBinary));
-
- if (subqueryToMarkJoinSlot.get(subquery).isPresent() &&
binaryOperator instanceof ComparisonPredicate) {
- return subqueryToMarkJoinSlot.get(subquery).get();
- }
- return newBinary;
- }
}
- private static ScalarSubquery
collectScalarSubqueryForBinaryOperator(BinaryOperator binaryOperator) {
- boolean leftContaionsScalar =
binaryOperator.left().containsType(ScalarSubquery.class);
- return leftContaionsScalar
- ? (ScalarSubquery) ((ImmutableSet) binaryOperator.left()
- .collect(ScalarSubquery.class::isInstance)).asList().get(0)
- : (ScalarSubquery) ((ImmutableSet) binaryOperator.right()
- .collect(ScalarSubquery.class::isInstance)).asList().get(0);
- }
}
diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
index d6eae9c01a..492b4c9545 100644
--- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
+++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out
@@ -161,6 +161,23 @@
22 3
24 4
+-- !scalar_subquery1 --
+
+-- !scalar_subquery2 --
+
+-- !in_subquery --
+1 abc 2 3 4
+1 abcd 3 3 4
+
+-- !exist_subquery --
+2 uvw 3 4 2
+2 uvw 3 4 2
+2 xyz 2 4 2
+
+-- !in_subquery --
+
+-- !exist_subquery --
+
-- !scalar_subquery_with_order --
20 2
22 3
diff --git
a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
index 665ad67e48..d1fe2d7a88 100644
--- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
+++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy
@@ -205,13 +205,24 @@ suite ("sub_query_correlated") {
"""
//----------complex subqueries----------
- //----------remove temporarily---------
- /*qt_scalar_subquery """
+ qt_scalar_subquery1 """
select * from sub_query_correlated_subquery1
where k1 = (select sum(k1) from sub_query_correlated_subquery3
where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1 and
sub_query_correlated_subquery3.v2 = 2)
order by k1, k2
"""
+ qt_scalar_subquery2 """
+ SELECT *
+ FROM sub_query_correlated_subquery1 t1
+ WHERE coalesce(bitand(
+ cast(
+ (SELECT sum(k1)
+ FROM sub_query_correlated_subquery3 ) AS int),
+ cast(t1.k1 AS int)),
+ coalesce(t1.k1, t1.k2)) is NULL
+ ORDER BY t1.k1, t1.k2;
+ """
+
qt_in_subquery """
select * from sub_query_correlated_subquery3
where (k1 = 1 or k1 = 2 or k1 = 3) and v1 in (select k1 from
sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k2 =
sub_query_correlated_subquery3.v2 and sub_query_correlated_subquery1.k1 = 3)
@@ -222,12 +233,10 @@ suite ("sub_query_correlated") {
select * from sub_query_correlated_subquery3
where k1 = 2 and exists (select * from
sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 =
sub_query_correlated_subquery3.v2 and sub_query_correlated_subquery1.k2 = 4)
order by k1, k2
- """*/
+ """
//----------complex nonEqual subqueries----------
-
- //----------remove temporarily---------
- /*qt_in_subquery """
+ qt_in_subquery """
select * from sub_query_correlated_subquery3
where (k1 = 1 or k1 = 2 or k1 = 3) and v1 in (select k1 from
sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k2 >
sub_query_correlated_subquery3.v2 and sub_query_correlated_subquery1.k1 = 3)
order by k1, k2
@@ -237,7 +246,7 @@ suite ("sub_query_correlated") {
select * from sub_query_correlated_subquery3
where k1 = 2 and exists (select * from
sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 <
sub_query_correlated_subquery3.v2 and sub_query_correlated_subquery1.k2 = 4)
order by k1, k2
- """*/
+ """
//----------subquery with order----------
order_qt_scalar_subquery_with_order """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]