This is an automated email from the ASF dual-hosted git repository.
zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new ea7fb17053 [CALCITE-7318] Execution fails when the JOIN ON condition
contains references to columns from both the left and right sides
ea7fb17053 is described below
commit ea7fb170532de95d02486922b2601e09b855631c
Author: Zhen Chen <[email protected]>
AuthorDate: Thu Mar 19 08:50:00 2026 +0800
[CALCITE-7318] Execution fails when the JOIN ON condition contains
references to columns from both the left and right sides
---
.../calcite/rel/rules/SubQueryRemoveRule.java | 56 ++++++++++++++++++++--
.../org/apache/calcite/test/RelOptRulesTest.java | 2 +-
.../org/apache/calcite/test/RelOptRulesTest.xml | 15 ++++++
core/src/test/resources/sql/sub-query.iq | 17 +++++++
4 files changed, 85 insertions(+), 5 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index a1b59e952f..43d5adc27e 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -1050,10 +1050,58 @@ private static void matchJoin(SubQueryRemoveRule rule,
RelOptRuleCall call) {
boolean inputIntersectsRightSide =
inputSet.intersects(ImmutableBitSet.range(nFieldsLeft, nFieldsLeft +
nFieldsRight));
if (inputIntersectsLeftSide && inputIntersectsRightSide) {
- // The current existential rewrite needs to make join with one side of
the origin join and
- // generate a new condition to replace the on clause. But for RexNode
whose operands are
- // on either side of the join, we can't push them into join. So this
rewriting is not
- // supported.
+ if (join.getJoinType() != JoinRelType.INNER) {
+ // Rewriting requires flattening the join into a cross-product first
(see below).
+ // That transformation is only valid for INNER JOIN (A JOIN B ON c ≡
(A × B) WHERE c).
+ // For OUTER JOINs the semantics differ (NULL-padding), so we bail out.
+ return;
+ }
+
+ // The sub-query operands span both sides of the join, e.g.:
+ // SELECT empno FROM emp JOIN dept
+ // ON emp.deptno + dept.deptno >= SOME(SELECT deptno FROM dept)
+ //
+ // Because the sub-query references fields from both the left (emp) and
right (dept)
+ // inputs, we cannot attach the sub-query rewrite to either side alone.
+ // The logic is to exploit the INNER JOIN equivalence:
+ // L INNER JOIN R ON cond ≡ (L CROSS JOIN R) WHERE cond
+
+ // Step 1 – flatten to a cross-product so every field is visible in one
relation:
+ //
+ // Before: After (builder stack):
+ // LogicalJoin(INNER, cond) → LogicalJoin(INNER, true) ←
cross-product
+ // LogicalTableScan(EMP) LogicalTableScan(EMP)
+ // LogicalTableScan(DEPT) LogicalTableScan(DEPT)
+ builder.push(join.getLeft());
+ builder.push(join.getRight());
+ builder.join(JoinRelType.INNER, builder.literal(true));
+
+ // Step 2 – expand the sub-query against the flattened relation
(rule.apply).
+ // The sub-query rewrite pushes an auxiliary relation onto the stack,
e.g. for SOME:
+ //
+ // LogicalJoin(INNER, true) ← sub-query auxiliary join
+ // LogicalJoin(INNER, true) ← cross-product from step 1
+ // LogicalTableScan(EMP)
+ // LogicalTableScan(DEPT)
+ // LogicalAggregate(m, c, d) ← aggregate over sub-query
+ // LogicalTableScan(DEPT)
+ final int count = builder.peek().getRowType().getFieldCount();
+ final RelOptUtil.Logic logic =
+ LogicVisitor.find(RelOptUtil.Logic.TRUE,
ImmutableList.of(join.getCondition()), e);
+ final RexNode target =
+ rule.apply(e, variablesSet, logic, builder, 1, count, 0);
+
+ // Step 3 – replace the sub-query in the ON condition with the
expression (target)
+ // produced by rule.apply (e.g. a CASE expression for SOME, IS NOT
NULL for EXISTS),
+ // then materialize it as a Filter (valid because the cross-product is
already in place).
+ final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+ final RexNode newCond = shuttle.apply(join.getCondition());
+ builder.filter(newCond);
+
+ // Step 4 – project away the auxiliary columns introduced by the
sub-query rewrite,
+ // restoring the original output schema (fields 0 .. count-1).
+ builder.project(fields(builder, count));
+ call.transformTo(builder.build());
return;
}
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 98beae87c7..5b9b9d8aba 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -10427,7 +10427,7 @@ public interface Config extends RelRule.Config {
+ "emp.deptno + dept.deptno >= SOME(SELECT deptno FROM dept)";
sql(sql)
.withRule(CoreRules.JOIN_SUB_QUERY_TO_CORRELATE)
- .checkUnchanged();
+ .check();
}
/** Test case for
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index a16c920095..9cb281d0a5 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -8737,6 +8737,21 @@ LogicalProject(DEPTNO=[$0])
})], joinType=[inner])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$0])
+ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+ LogicalFilter(condition=[OR(AND(IS TRUE(>=(+($7, $9), $11)), <>($12, 0)),
AND(>($12, $13), null, <>($12, 0), IS NOT TRUE(>=(+($7, $9), $11))),
AND(>=(+($7, $9), $11), <>($12, 0), IS NOT TRUE(>=(+($7, $9), $11)), <=($12,
$13)))])
+ LogicalJoin(condition=[true], joinType=[inner])
+ LogicalJoin(condition=[true], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalProject(m=[$0], c=[$1], d=[$1])
+ LogicalAggregate(group=[{}], m=[MIN($0)], c=[COUNT()])
+ LogicalProject(DEPTNO=[$0])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
diff --git a/core/src/test/resources/sql/sub-query.iq
b/core/src/test/resources/sql/sub-query.iq
index ac2fe6d8aa..b9d4774cef 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -9070,6 +9070,23 @@ where exists (
!ok
+# [CALCITE-7318] Execution fails when the JOIN ON condition contains
references to columns from both the left and right sides
+!use scott
+select Header.Name from ( VALUES (1, 'A'), (2, 'B')) as Header(Id, Name)
+join (values (11, 1), (12, 1), (21, 2)) as Version(Id, Parent)
+on not exists (select 1 from (values (11, 1), (12, 1), (21, 2)) as
Version2(Id, Parent)
+where Version2.Parent = Header.Id and Version2.Id > Version.Id);
++------+
+| NAME |
++------+
+| A |
+| A |
+| B |
++------+
+(3 rows)
+
+!ok
+
# [CALCITE-5132] Scalar IN subquery returns UNKNOWN instead of FALSE when key
is partially NULL.
# Case 1: Default insubquerythreshold=20
!if (use_old_decorr) {