This is an automated email from the ASF dual-hosted git repository.

zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new ea7fb17053 [CALCITE-7318] Execution fails when the JOIN ON condition 
contains references to columns from both the left and right sides
ea7fb17053 is described below

commit ea7fb170532de95d02486922b2601e09b855631c
Author: Zhen Chen <[email protected]>
AuthorDate: Thu Mar 19 08:50:00 2026 +0800

    [CALCITE-7318] Execution fails when the JOIN ON condition contains 
references to columns from both the left and right sides
---
 .../calcite/rel/rules/SubQueryRemoveRule.java      | 56 ++++++++++++++++++++--
 .../org/apache/calcite/test/RelOptRulesTest.java   |  2 +-
 .../org/apache/calcite/test/RelOptRulesTest.xml    | 15 ++++++
 core/src/test/resources/sql/sub-query.iq           | 17 +++++++
 4 files changed, 85 insertions(+), 5 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java 
b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index a1b59e952f..43d5adc27e 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -1050,10 +1050,58 @@ private static void matchJoin(SubQueryRemoveRule rule, 
RelOptRuleCall call) {
     boolean inputIntersectsRightSide =
         inputSet.intersects(ImmutableBitSet.range(nFieldsLeft, nFieldsLeft + 
nFieldsRight));
     if (inputIntersectsLeftSide && inputIntersectsRightSide) {
-      // The current existential rewrite needs to make join with one side of 
the origin join and
-      // generate a new condition to replace the on clause. But for RexNode 
whose operands are
-      // on either side of the join, we can't push them into join. So this 
rewriting is not
-      // supported.
+      if (join.getJoinType() != JoinRelType.INNER) {
+        // Rewriting requires flattening the join into a cross-product first 
(see below).
+        // That transformation is only valid for INNER JOIN (A JOIN B ON c ≡ 
(A × B) WHERE c).
+        // For OUTER JOINs the semantics differ (NULL-padding), so we bail out.
+        return;
+      }
+
+      // The sub-query operands span both sides of the join, e.g.:
+      //   SELECT empno FROM emp JOIN dept
+      //   ON emp.deptno + dept.deptno >= SOME(SELECT deptno FROM dept)
+      //
+      // Because the sub-query references fields from both the left (emp) and 
right (dept)
+      // inputs, we cannot attach the sub-query rewrite to either side alone.
+      // The logic is to exploit the INNER JOIN equivalence:
+      //   L INNER JOIN R ON cond  ≡  (L CROSS JOIN R) WHERE cond
+
+      // Step 1 – flatten to a cross-product so every field is visible in one 
relation:
+      //
+      //   Before:                        After (builder stack):
+      //   LogicalJoin(INNER, cond)   →   LogicalJoin(INNER, true)   ← 
cross-product
+      //     LogicalTableScan(EMP)          LogicalTableScan(EMP)
+      //     LogicalTableScan(DEPT)         LogicalTableScan(DEPT)
+      builder.push(join.getLeft());
+      builder.push(join.getRight());
+      builder.join(JoinRelType.INNER, builder.literal(true));
+
+      // Step 2 – expand the sub-query against the flattened relation 
(rule.apply).
+      //   The sub-query rewrite pushes an auxiliary relation onto the stack, 
e.g. for SOME:
+      //
+      //   LogicalJoin(INNER, true)                ← sub-query auxiliary join
+      //     LogicalJoin(INNER, true)              ← cross-product from step 1
+      //       LogicalTableScan(EMP)
+      //       LogicalTableScan(DEPT)
+      //     LogicalAggregate(m, c, d)             ← aggregate over sub-query
+      //       LogicalTableScan(DEPT)
+      final int count = builder.peek().getRowType().getFieldCount();
+      final RelOptUtil.Logic logic =
+          LogicVisitor.find(RelOptUtil.Logic.TRUE, 
ImmutableList.of(join.getCondition()), e);
+      final RexNode target =
+          rule.apply(e, variablesSet, logic, builder, 1, count, 0);
+
+      // Step 3 – replace the sub-query in the ON condition with the 
expression (target)
+      //   produced by rule.apply (e.g. a CASE expression for SOME, IS NOT 
NULL for EXISTS),
+      //   then materialize it as a Filter (valid because the cross-product is 
already in place).
+      final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+      final RexNode newCond = shuttle.apply(join.getCondition());
+      builder.filter(newCond);
+
+      // Step 4 – project away the auxiliary columns introduced by the 
sub-query rewrite,
+      //   restoring the original output schema (fields 0 .. count-1).
+      builder.project(fields(builder, count));
+      call.transformTo(builder.build());
       return;
     }
 
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java 
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 98beae87c7..5b9b9d8aba 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -10427,7 +10427,7 @@ public interface Config extends RelRule.Config {
         + "emp.deptno + dept.deptno >= SOME(SELECT deptno FROM dept)";
     sql(sql)
         .withRule(CoreRules.JOIN_SUB_QUERY_TO_CORRELATE)
-        .checkUnchanged();
+        .check();
   }
 
   /** Test case for
diff --git 
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml 
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index a16c920095..9cb281d0a5 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -8737,6 +8737,21 @@ LogicalProject(DEPTNO=[$0])
 })], joinType=[inner])
     LogicalTableScan(table=[[CATALOG, SALES, EMP]])
     LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$9], NAME=[$10])
+    LogicalFilter(condition=[OR(AND(IS TRUE(>=(+($7, $9), $11)), <>($12, 0)), 
AND(>($12, $13), null, <>($12, 0), IS NOT TRUE(>=(+($7, $9), $11))), 
AND(>=(+($7, $9), $11), <>($12, 0), IS NOT TRUE(>=(+($7, $9), $11)), <=($12, 
$13)))])
+      LogicalJoin(condition=[true], joinType=[inner])
+        LogicalJoin(condition=[true], joinType=[inner])
+          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+          LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+        LogicalProject(m=[$0], c=[$1], d=[$1])
+          LogicalAggregate(group=[{}], m=[MIN($0)], c=[COUNT()])
+            LogicalProject(DEPTNO=[$0])
+              LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
 ]]>
     </Resource>
   </TestCase>
diff --git a/core/src/test/resources/sql/sub-query.iq 
b/core/src/test/resources/sql/sub-query.iq
index ac2fe6d8aa..b9d4774cef 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -9070,6 +9070,23 @@ where exists (
 
 !ok
 
+# [CALCITE-7318] Execution fails when the JOIN ON condition contains 
references to columns from both the left and right sides
+!use scott
+select Header.Name from ( VALUES (1, 'A'), (2, 'B')) as Header(Id, Name)
+join (values (11, 1), (12, 1), (21, 2)) as Version(Id, Parent)
+on not exists (select 1 from (values (11, 1), (12, 1), (21, 2)) as 
Version2(Id, Parent)
+where Version2.Parent = Header.Id and Version2.Id > Version.Id);
++------+
+| NAME |
++------+
+| A    |
+| A    |
+| B    |
++------+
+(3 rows)
+
+!ok
+
 # [CALCITE-5132] Scalar IN subquery returns UNKNOWN instead of FALSE when key 
is partially NULL.
 # Case 1: Default insubquerythreshold=20
 !if (use_old_decorr) {

Reply via email to