This is an automated email from the ASF dual-hosted git repository.
morrySnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 69b81114a40 [fix](subquery)show user friendly message when meet
unsupported subquery (#64778)
69b81114a40 is described below
commit 69b81114a4012adfd643a968714454db7f1f6379
Author: starocean999 <[email protected]>
AuthorDate: Mon Jun 29 10:45:24 2026 +0800
[fix](subquery)show user friendly message when meet unsupported subquery
(#64778)
---
.../nereids/rules/analysis/SubExprAnalyzer.java | 68 ++++++-
.../rules/analysis/AnalyzeSubQueryTest.java | 66 +++++++
.../subquery/correlated_scalar_subquery.out | 63 ++++++
.../subquery/correlated_scalar_subquery.groovy | 219 +++++++++++++++++++++
4 files changed, 415 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
index 81cd3ee38ea..ad9a07995b6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubExprAnalyzer.java
@@ -38,7 +38,9 @@ import
org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation;
import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
+import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.ExpressionUtils;
@@ -97,6 +99,16 @@ class SubExprAnalyzer<T> extends
DefaultExpressionRewriter<T> {
throw new AnalysisException("Unsupported correlated subquery with
a LIMIT clause with offset > 0 "
+ analyzedResult.getLogicalPlan());
}
+ // EXISTS over a top-level scalar aggregate (no GROUP BY) always
returns
+ // exactly one row. Fold to TRUE / FALSE immediately; this also avoids
+ // rejecting a valid query that happens to contain a set-operation
+ // underneath the aggregate, e.g.
+ // WHERE EXISTS (SELECT COUNT(*) FROM (... UNION ALL ...) u)
+ // because SubqueryToApply would have constant-folded it anyway.
+ if (hasTopLevelScalarAgg(analyzedResult)) {
+ return BooleanLiteral.of(!exists.isNot());
+ }
+ checkNoCorrelatedSlotsUnderSetOp(analyzedResult);
return new Exists(analyzedResult.getLogicalPlan(),
analyzedResult.getCorrelatedSlots(), exists.isNot());
}
@@ -114,6 +126,7 @@ class SubExprAnalyzer<T> extends
DefaultExpressionRewriter<T> {
checkOutputColumn(analyzedResult.getLogicalPlan());
checkNoCorrelatedSlotsUnderAgg(analyzedResult);
+ checkNoCorrelatedSlotsUnderSetOp(analyzedResult);
checkRootIsLimit(analyzedResult);
return new InSubquery(
@@ -220,6 +233,14 @@ class SubExprAnalyzer<T> extends
DefaultExpressionRewriter<T> {
}
}
+ private void checkNoCorrelatedSlotsUnderSetOp(AnalyzedResult
analyzedResult) {
+ if (analyzedResult.hasCorrelatedSlotsUnderSetOp()) {
+ throw new AnalysisException(
+ "Unsupported correlated subquery with set operation "
+ + analyzedResult.getLogicalPlan());
+ }
+ }
+
private void checkRootIsLimit(AnalyzedResult analyzedResult) {
if (!analyzedResult.isCorrelated()) {
return;
@@ -230,6 +251,29 @@ class SubExprAnalyzer<T> extends
DefaultExpressionRewriter<T> {
}
}
+ /**
+ * Check whether the analyzed subquery plan has a top-level scalar
aggregate
+ * (aggregate without GROUP BY). Such an aggregate is guaranteed to return
+ * exactly one row regardless of its input, so EXISTS over it is always
TRUE
+ * and NOT EXISTS is always FALSE. Sorting the single row cannot change
+ * EXISTS semantics, so we also strip leading LogicalSort and
+ * LogicalSubQueryAlias wrappers (the latter appears during analysis before
+ * LogicalSubQueryAliasToLogicalProject is applied).
+ */
+ private boolean hasTopLevelScalarAgg(AnalyzedResult analyzedResult) {
+ LogicalPlan plan = analyzedResult.getLogicalPlan();
+ // Strip leading projects, sorts, and subquery-alias wrappers —
+ // analysis may wrap the aggregate in any of these.
+ while (plan instanceof LogicalProject || plan instanceof LogicalSort
+ || plan instanceof LogicalSubQueryAlias) {
+ plan = (LogicalPlan) plan.child(0);
+ }
+ if (plan instanceof LogicalAggregate) {
+ return ((LogicalAggregate<?>)
plan).getGroupByExpressions().isEmpty();
+ }
+ return false;
+ }
+
private AnalyzedResult analyzeSubquery(SubqueryExpr expr) {
if (cascadesContext == null) {
throw new IllegalStateException("Missing CascadesContext");
@@ -281,13 +325,19 @@ class SubExprAnalyzer<T> extends
DefaultExpressionRewriter<T> {
ImmutableSet.copyOf(correlatedSlots),
LogicalAggregate.class);
}
+ public boolean hasCorrelatedSlotsUnderSetOp() {
+ return correlatedSlots.isEmpty() ? false
+ : hasCorrelatedSlotsUnderNode(logicalPlan,
+ ImmutableSet.copyOf(correlatedSlots),
LogicalSetOperation.class);
+ }
+
private static <T> boolean hasCorrelatedSlotsUnderNode(Plan rootPlan,
ImmutableSet<Slot> slots, Class<T> clazz) {
ArrayDeque<Plan> planQueue = new ArrayDeque<>();
planQueue.add(rootPlan);
while (!planQueue.isEmpty()) {
Plan plan = planQueue.poll();
- if (plan.getClass().equals(clazz)) {
+ if (clazz.isInstance(plan)) {
if (plan.containsSlots(slots)) {
return true;
}
@@ -358,6 +408,17 @@ class SubExprAnalyzer<T> extends
DefaultExpressionRewriter<T> {
}
}
+ public PlanNodeCorrelatedInfo
visitLogicalOneRowRelation(LogicalOneRowRelation plan, Void context) {
+ boolean containCorrelatedSlots = findCorrelatedSlots(plan);
+ if (containCorrelatedSlots) {
+ throw new AnalysisException(
+ String.format("access outer query's column in project
is not supported",
+ correlatedSlots));
+ } else {
+ return new PlanNodeCorrelatedInfo(plan.getType(), false);
+ }
+ }
+
public PlanNodeCorrelatedInfo visitLogicalAggregate(LogicalAggregate
plan, Void context) {
boolean containCorrelatedSlots = findCorrelatedSlots(plan);
if (containCorrelatedSlots) {
@@ -433,6 +494,11 @@ class SubExprAnalyzer<T> extends
DefaultExpressionRewriter<T> {
case LOGICAL_JOIN:
throw new AnalysisException(
"access outer query's column before join
is not supported");
+ case LOGICAL_UNION:
+ case LOGICAL_INTERSECT:
+ case LOGICAL_EXCEPT:
+ throw new AnalysisException(
+ "access outer query's column before set
operation is not supported");
case LOGICAL_SORT:
// allow any sort node, the sort node will be
removed by ELIMINATE_ORDER_BY_UNDER_SUBQUERY
break;
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeSubQueryTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeSubQueryTest.java
index 83cf64031f6..45b28a25d41 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeSubQueryTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/AnalyzeSubQueryTest.java
@@ -267,6 +267,72 @@ public class AnalyzeSubQueryTest extends TestWithFeService
implements MemoPatter
Assertions.assertTrue(exception.getMessage().contains("limit is not
supported in correlated subquery"));
}
+ @Test
+ public void testExistsOverScalarAggUnionOrderBy() {
+ // EXISTS over scalar aggregate with ORDER BY wrapper and UNION ALL.
+ // hasTopLevelScalarAgg() must see through LogicalSort to fold to
TRUE/FALSE.
+ String sql = "SELECT EXISTS ("
+ + "SELECT COUNT(*) FROM ("
+ + "SELECT id FROM T1 UNION ALL SELECT id FROM T2"
+ + ") u ORDER BY 1"
+ + ") AS result";
+ PlanChecker.from(connectContext).analyze(sql);
+ }
+
+ @Test
+ public void testNotExistsOverScalarAggUnionOrderBy() {
+ String sql = "SELECT NOT EXISTS ("
+ + "SELECT COUNT(*) FROM ("
+ + "SELECT id FROM T1 UNION ALL SELECT id FROM T2"
+ + ") u ORDER BY 1"
+ + ") AS result";
+ PlanChecker.from(connectContext).analyze(sql);
+ }
+
+ @Test
+ public void testExistsCorrelatedScalarAggUnionOrderBy() {
+ // Correlated EXISTS over scalar aggregate + UNION ALL + ORDER BY.
+ // Must fold to TRUE/FALSE before checkNoCorrelatedSlotsUnderSetOp().
+ String sql = "SELECT id FROM T1 t1 WHERE EXISTS ("
+ + "SELECT COUNT(*) FROM ("
+ + "SELECT id FROM T2 t2 WHERE t1.id = t2.id"
+ + " UNION ALL "
+ + "SELECT id FROM T3 t3 WHERE t1.id = t3.id"
+ + ") u ORDER BY 1"
+ + ") ORDER BY id";
+ PlanChecker.from(connectContext).analyze(sql);
+ }
+
+ @Test
+ public void testExistsOverScalarAggUnionDerivedTable() {
+ // EXISTS over a scalar aggregate wrapped in a derived-table alias:
+ // WHERE EXISTS (SELECT * FROM (SELECT COUNT(*) FROM (<union>) u) a)
+ // hasTopLevelScalarAgg() must see through LogicalSubQueryAlias to
fold.
+ String sql = "SELECT id FROM T1 t1 WHERE EXISTS ("
+ + "SELECT * FROM ("
+ + "SELECT COUNT(*) FROM ("
+ + "SELECT id FROM T2 t2 WHERE t1.id = t2.id"
+ + " UNION ALL "
+ + "SELECT id FROM T3 t3 WHERE t1.id = t3.id"
+ + ") u"
+ + ") a"
+ + ") ORDER BY id";
+ PlanChecker.from(connectContext).analyze(sql);
+ }
+
+ @Test
+ public void testExistsOverScalarAggUnionDerivedTableNotCorrelated() {
+ // Non-correlated variant of the derived-table shape.
+ String sql = "SELECT EXISTS ("
+ + "SELECT * FROM ("
+ + "SELECT COUNT(*) FROM ("
+ + "SELECT id FROM T1 UNION ALL SELECT id FROM T2"
+ + ") u"
+ + ") a"
+ + ") AS result";
+ PlanChecker.from(connectContext).analyze(sql);
+ }
+
private void checkScalarSubquerySlotNullable(String sql, boolean
outputNullable) {
Plan root = PlanChecker.from(connectContext)
.analyze(sql)
diff --git
a/regression-test/data/query_p0/subquery/correlated_scalar_subquery.out
b/regression-test/data/query_p0/subquery/correlated_scalar_subquery.out
index b95797f5fbc..5bacb559c03 100644
--- a/regression-test/data/query_p0/subquery/correlated_scalar_subquery.out
+++ b/regression-test/data/query_p0/subquery/correlated_scalar_subquery.out
@@ -138,3 +138,66 @@
4
5
+-- !exists_over_scalar_agg_union --
+true
+
+-- !not_exists_over_scalar_agg_union --
+false
+
+-- !exists_correlated_scalar_agg_union --
+\N
+\N
+\N
+1
+1
+1
+2
+2
+3
+3
+20
+22
+24
+
+-- !exists_over_scalar_agg_union_orderby --
+true
+
+-- !not_exists_over_scalar_agg_union_orderby --
+false
+
+-- !exists_correlated_scalar_agg_union_orderby --
+\N
+\N
+\N
+1
+1
+1
+2
+2
+3
+3
+20
+22
+24
+
+-- !exists_correlated_scalar_agg_union_derived --
+\N
+\N
+\N
+1
+1
+1
+2
+2
+3
+3
+20
+22
+24
+
+-- !exists_scalar_agg_union_derived --
+true
+
+-- !not_exists_scalar_agg_union_derived --
+false
+
diff --git
a/regression-test/suites/query_p0/subquery/correlated_scalar_subquery.groovy
b/regression-test/suites/query_p0/subquery/correlated_scalar_subquery.groovy
index 15fecd29274..2c9c4763622 100644
--- a/regression-test/suites/query_p0/subquery/correlated_scalar_subquery.groovy
+++ b/regression-test/suites/query_p0/subquery/correlated_scalar_subquery.groovy
@@ -256,8 +256,227 @@ suite("correlated_scalar_subquery") {
exception "access outer query column"
}
+ test {
+ sql """
+ SELECT correlated_scalar_t1.c1 FROM correlated_scalar_t1 WHERE
correlated_scalar_t1.c1 = (SELECT correlated_scalar_t1.c1);
+ """
+ exception "access outer query's column in project is not supported"
+ }
+
+ test {
+ sql """
+ SELECT o.c1,
+ (SELECT COUNT(*) FROM (
+ SELECT i.c1 AS x FROM correlated_scalar_t2 i WHERE
i.c1 = o.c1
+ UNION ALL
+ SELECT i.c1 AS x FROM correlated_scalar_t2 i WHERE
i.c1 = o.c1
+ ) u) AS c
+ FROM correlated_scalar_t1 o;
+ """
+ exception "access outer query's column before set operation is not
supported"
+ }
+
+ test {
+ sql """
+ SELECT o.c1,
+ (SELECT COUNT(*) FROM (
+ SELECT i.c1 AS x FROM correlated_scalar_t2 i WHERE
i.c1 = o.c1
+ INTERSECT
+ SELECT i.c1 AS x FROM correlated_scalar_t2 i WHERE
i.c1 = o.c1
+ ) u) AS c
+ FROM correlated_scalar_t1 o;
+ """
+ exception "access outer query's column before set operation is not
supported"
+ }
+
+ test {
+ sql """
+ SELECT o.c1,
+ (SELECT COUNT(*) FROM (
+ SELECT i.c1 AS x FROM correlated_scalar_t2 i WHERE
i.c1 = o.c1
+ EXCEPT
+ SELECT i.c1 AS x FROM correlated_scalar_t2 i WHERE
i.c1 = o.c1
+ ) u) AS c
+ FROM correlated_scalar_t1 o;
+ """
+ exception "access outer query's column before set operation is not
supported"
+ }
+
+ test {
+ sql """
+ SELECT c1 FROM correlated_scalar_t1 WHERE c1 IN (
+ SELECT c1 FROM correlated_scalar_t2 WHERE
correlated_scalar_t1.c2 = correlated_scalar_t2.c2
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t3
+ );
+ """
+ exception "Unsupported correlated subquery with set operation"
+ }
+
+ test {
+ sql """
+ SELECT c1 FROM correlated_scalar_t1 WHERE c1 IN (
+ SELECT c1 FROM correlated_scalar_t2 WHERE
correlated_scalar_t1.c2 = correlated_scalar_t2.c2
+ INTERSECT
+ SELECT c1 FROM correlated_scalar_t3
+ );
+ """
+ exception "Unsupported correlated subquery with set operation"
+ }
+
+ test {
+ sql """
+ SELECT c1 FROM correlated_scalar_t1 WHERE c1 IN (
+ SELECT c1 FROM correlated_scalar_t2 WHERE
correlated_scalar_t1.c2 = correlated_scalar_t2.c2
+ EXCEPT
+ SELECT c1 FROM correlated_scalar_t3
+ );
+ """
+ exception "Unsupported correlated subquery with set operation"
+ }
+
+ test {
+ sql """
+ SELECT c1 FROM correlated_scalar_t1 WHERE EXISTS (
+ SELECT c1 FROM correlated_scalar_t2 WHERE
correlated_scalar_t1.c2 = correlated_scalar_t2.c2
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t3
+ );
+ """
+ exception "Unsupported correlated subquery with set operation"
+ }
+
+ test {
+ sql """
+ SELECT c1 FROM correlated_scalar_t1 WHERE EXISTS (
+ SELECT c1 FROM correlated_scalar_t2 WHERE
correlated_scalar_t1.c2 = correlated_scalar_t2.c2
+ INTERSECT
+ SELECT c1 FROM correlated_scalar_t3
+ );
+ """
+ exception "Unsupported correlated subquery with set operation"
+ }
+
+ test {
+ sql """
+ SELECT c1 FROM correlated_scalar_t1 WHERE EXISTS (
+ SELECT c1 FROM correlated_scalar_t2 WHERE
correlated_scalar_t1.c2 = correlated_scalar_t2.c2
+ EXCEPT
+ SELECT c1 FROM correlated_scalar_t3
+ );
+ """
+ exception "Unsupported correlated subquery with set operation"
+ }
+
qt_select_agg_project1 """select c2 from correlated_scalar_t1 where
correlated_scalar_t1.c2 > (select if(count(c1) = 0, 2, 100) from
correlated_scalar_t2 where correlated_scalar_t1.c1 = correlated_scalar_t2.c1)
order by c2;"""
qt_select_agg_project2 """select c2 from correlated_scalar_t1 where
correlated_scalar_t1.c2 = (select if(sum(c1) is null, 2, 100) from
correlated_scalar_t2 where correlated_scalar_t1.c1 = correlated_scalar_t2.c1)
order by c2;"""
qt_select_2_aggs """select c2 from correlated_scalar_t1 where
correlated_scalar_t1.c2 > (select count(c1) - min(c1) from correlated_scalar_t2
where correlated_scalar_t1.c1 = correlated_scalar_t2.c1) order by c2;"""
qt_select_3_aggs """select c2 from correlated_scalar_t1 where
correlated_scalar_t1.c2 > (select if(sum(c1) is null, count(c1), max(c2)) from
correlated_scalar_t2 where correlated_scalar_t1.c1 = correlated_scalar_t2.c1)
order by c2;"""
+
+ // EXISTS over a top-level scalar aggregate wraps a set operation:
+ // hasTopLevelScalarAgg() in SubExprAnalyzer folds EXISTS to TRUE and
+ // NOT EXISTS to FALSE before checkNoCorrelatedSlotsUnderSetOp() runs,
+ // because a scalar aggregate (no GROUP BY) always returns one row.
+ qt_exists_over_scalar_agg_union """
+ SELECT EXISTS (
+ SELECT COUNT(*) FROM (
+ SELECT c1 FROM correlated_scalar_t1
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t2
+ ) u
+ ) AS result
+ """
+ qt_not_exists_over_scalar_agg_union """
+ SELECT NOT EXISTS (
+ SELECT COUNT(*) FROM (
+ SELECT c1 FROM correlated_scalar_t1
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t2
+ ) u
+ ) AS result
+ """
+ // Correlated EXISTS over scalar agg + UNION: the outer query references
+ // are inside the UNION branches, but the aggregate still guarantees one
+ // row. This shape must not throw "Unsupported correlated subquery with
+ // set operation" — the scalar aggregate fold happens first.
+ qt_exists_correlated_scalar_agg_union """
+ SELECT c1 FROM correlated_scalar_t1 t1 WHERE EXISTS (
+ SELECT COUNT(*) FROM (
+ SELECT c1 FROM correlated_scalar_t2 t2 WHERE t1.c1 = t2.c1
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t3 t3 WHERE t1.c1 = t3.c1
+ ) u
+ ) ORDER BY c1
+ """
+
+ // EXISTS over a scalar aggregate with ORDER BY wrapper — the sort cannot
+ // change EXISTS semantics and hasTopLevelScalarAgg() must see through it.
+ // This shape must not throw "Unsupported correlated subquery with set
+ // operation" or "Unsupported correlated subquery with a LIMIT clause".
+ // Non-correlated variant: EXISTS (SELECT COUNT(*) FROM (... UNION ...) u
ORDER BY 1)
+ qt_exists_over_scalar_agg_union_orderby """
+ SELECT EXISTS (
+ SELECT COUNT(*) FROM (
+ SELECT c1 FROM correlated_scalar_t1
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t2
+ ) u ORDER BY 1
+ ) AS result
+ """
+ qt_not_exists_over_scalar_agg_union_orderby """
+ SELECT NOT EXISTS (
+ SELECT COUNT(*) FROM (
+ SELECT c1 FROM correlated_scalar_t1
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t2
+ ) u ORDER BY 1
+ ) AS result
+ """
+ // Correlated variant: ORDER BY over correlated scalar-agg + UNION.
+ qt_exists_correlated_scalar_agg_union_orderby """
+ SELECT c1 FROM correlated_scalar_t1 t1 WHERE EXISTS (
+ SELECT COUNT(*) FROM (
+ SELECT c1 FROM correlated_scalar_t2 t2 WHERE t1.c1 = t2.c1
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t3 t3 WHERE t1.c1 = t3.c1
+ ) u ORDER BY 1
+ ) ORDER BY c1
+ """
+
+ // EXISTS over a scalar aggregate wrapped in a derived-table alias:
+ // WHERE EXISTS (SELECT * FROM (SELECT COUNT(*) FROM (<union>) u) a)
+ // hasTopLevelScalarAgg() must see through LogicalSubQueryAlias to fold.
+ qt_exists_correlated_scalar_agg_union_derived """
+ SELECT c1 FROM correlated_scalar_t1 t1 WHERE EXISTS (
+ SELECT * FROM (
+ SELECT COUNT(*) FROM (
+ SELECT c1 FROM correlated_scalar_t2 t2 WHERE t1.c1 = t2.c1
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t3 t3 WHERE t1.c1 = t3.c1
+ ) u
+ ) a
+ ) ORDER BY c1
+ """
+ qt_exists_scalar_agg_union_derived """
+ SELECT EXISTS (
+ SELECT * FROM (
+ SELECT COUNT(*) FROM (
+ SELECT c1 FROM correlated_scalar_t1
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t2
+ ) u
+ ) a
+ ) AS result
+ """
+ qt_not_exists_scalar_agg_union_derived """
+ SELECT NOT EXISTS (
+ SELECT * FROM (
+ SELECT COUNT(*) FROM (
+ SELECT c1 FROM correlated_scalar_t1
+ UNION ALL
+ SELECT c1 FROM correlated_scalar_t2
+ ) u
+ ) a
+ ) AS result
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]