This is an automated email from the ASF dual-hosted git repository.

asolimando pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 130c8dcacd [CALCITE-6824] Subquery in join conditions rewrite fails if 
referencing a column from the right-hand side table
130c8dcacd is described below

commit 130c8dcacd2a2e8e44aa37c3832db063af66dd0f
Author: suibianwanwan <[email protected]>
AuthorDate: Sun Feb 16 21:29:13 2025 +0800

    [CALCITE-6824] Subquery in join conditions rewrite fails if referencing a 
column from the right-hand side table
---
 .../calcite/rel/rules/SubQueryRemoveRule.java      | 60 ++++++++++++++---
 .../org/apache/calcite/test/RelOptRulesTest.java   | 39 +++++++++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    | 78 ++++++++++++++++++++++
 core/src/test/resources/sql/sub-query.iq           | 72 ++++++++++++++++++++
 4 files changed, 239 insertions(+), 10 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java 
b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index 585d938d7f..9bf4e6526c 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -910,16 +910,56 @@ private static void matchJoin(SubQueryRemoveRule rule, 
RelOptRuleCall call) {
     final RelOptUtil.Logic logic =
         LogicVisitor.find(RelOptUtil.Logic.TRUE,
             ImmutableList.of(join.getCondition()), e);
-    builder.push(join.getLeft());
-    builder.push(join.getRight());
-    final int fieldCount = join.getRowType().getFieldCount();
-    final Set<CorrelationId>  variablesSet =
-        RelOptUtil.getVariablesUsed(e.rel);
-    final RexNode target =
-        rule.apply(e, variablesSet, logic, builder, 2, fieldCount, 0);
-    final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
-    builder.join(join.getJoinType(), shuttle.apply(join.getCondition()));
-    builder.project(fields(builder, join.getRowType().getFieldCount()));
+
+    ImmutableBitSet inputSet = RelOptUtil.InputFinder.bits(e.getOperands(), 
null);
+    int nFieldsLeft = join.getLeft().getRowType().getFieldCount();
+    int nFieldsRight = join.getRight().getRowType().getFieldCount();
+
+
+    boolean inputIntersectsLeftSide = 
inputSet.intersects(ImmutableBitSet.range(0, nFieldsLeft));
+    boolean inputIntersectsRightSide =
+        inputSet.intersects(ImmutableBitSet.range(nFieldsLeft, nFieldsLeft + 
nFieldsRight));
+    if (inputIntersectsLeftSide && inputIntersectsRightSide) {
+      // The current existential rewrite needs to make join with one side of 
the origin join and
+      // generate a new condition to replace the on clause. But for RexNode 
whose operands are
+      // on either side of the join, we can't push them into join. So this 
rewriting is not
+      // supported.
+      return;
+    }
+
+    final Set<CorrelationId> variablesSet = RelOptUtil.getVariablesUsed(e.rel);
+    if (inputIntersectsLeftSide) {
+      builder.push(join.getLeft());
+
+      final RexNode target =
+          rule.apply(e, variablesSet, logic, builder, 1, nFieldsLeft, 0);
+      final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+
+      final RexNode newCond =
+          shuttle.apply(
+              RexUtil.shift(join.getCondition(), nFieldsLeft,
+                  builder.fields().size() - nFieldsLeft));
+      builder.push(join.getRight());
+      builder.join(join.getJoinType(), newCond);
+
+      final int nFields = builder.fields().size();
+      ImmutableList<RexNode> fields =
+          builder.fields(ImmutableBitSet.range(0, nFieldsLeft)
+              .union(ImmutableBitSet.range(nFields - nFieldsRight, nFields)));
+      builder.project(fields);
+    } else {
+      builder.push(join.getLeft());
+      builder.push(join.getRight());
+
+      final int nFields = join.getRowType().getFieldCount();
+      final RexNode target =
+          rule.apply(e, variablesSet, logic, builder, 2, nFields, 0);
+      final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+
+      builder.join(join.getJoinType(), shuttle.apply(join.getCondition()));
+      builder.project(fields(builder, nFields));
+    }
+
     call.transformTo(builder.build());
   }
 
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java 
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index b02a929e67..61518e7701 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -9116,6 +9116,45 @@ public interface Config extends RelRule.Config {
         .check();
   }
 
+  /**
+   * Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6824";>[CALCITE-6824]
+   * Subquery in join conditions rewrite fails if referencing a column
+   * from the right-hand side table</a>. */
+  @Test void testJoinSubQueryRemoveRuleWithNotIn() {
+    final String sql = "SELECT empno FROM emp JOIN dept on "
+        + "emp.deptno not in (SELECT deptno FROM dept)";
+    sql(sql)
+        .withRule(CoreRules.JOIN_SUB_QUERY_TO_CORRELATE)
+        .check();
+  }
+
+  /**
+   * Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6824";>[CALCITE-6824]
+   * Subquery in join conditions rewrite fails if referencing a column
+   * from the right-hand side table</a>. */
+  @Test void testJoinSubQueryRemoveRuleWithQuantifierSome() {
+    final String sql = "SELECT empno FROM emp JOIN dept on "
+        + "emp.deptno >= SOME(SELECT deptno FROM dept)";
+    sql(sql)
+        .withRule(CoreRules.JOIN_SUB_QUERY_TO_CORRELATE)
+        .check();
+  }
+
+  /**
+   * Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6824";>[CALCITE-6824]
+   * Subquery in join conditions rewrite fails if referencing a column
+   * from the right-hand side table</a>. */
+  @Test void testJoinSubQueryRewriteWithBothSidesColumns() {
+    final String sql = "SELECT empno FROM emp JOIN dept on "
+        + "emp.deptno + dept.deptno >= SOME(SELECT deptno FROM dept)";
+    sql(sql)
+        .withRule(CoreRules.JOIN_SUB_QUERY_TO_CORRELATE)
+        .checkUnchanged();
+  }
+
   /** Test case for
    * <a 
href="https://issues.apache.org/jira/browse/CALCITE-2295";>[CALCITE-2295]
    * Correlated SubQuery with Project will generate error plan</a>. */
diff --git 
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml 
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 60c683ee50..3d2f5bf0ee 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -6282,6 +6282,84 @@ LogicalProject(SAL=[$5])
               LogicalProject(SAL=[$5], $f9=[=($5, 4)])
                 LogicalFilter(condition=[AND(=($7, 20), >($5, 1000))])
                   LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testJoinSubQueryRemoveRuleWithNotIn">
+    <Resource name="sql">
+      <![CDATA[SELECT empno FROM emp JOIN dept on emp.deptno not in (SELECT 
deptno FROM dept)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0])
+  LogicalJoin(condition=[NOT(IN($7, {
+LogicalProject(DEPTNO=[$0])
+  LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+}))], joinType=[inner])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$13], NAME=[$14])
+    LogicalJoin(condition=[OR(=($9, 0), AND(IS NULL($12), >=($10, $9)))], 
joinType=[inner])
+      LogicalJoin(condition=[=($7, $11)], joinType=[left])
+        LogicalJoin(condition=[true], joinType=[inner])
+          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+          LogicalProject(c=[$0], ck=[$0])
+            LogicalAggregate(group=[{}], c=[COUNT()])
+              LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+        LogicalProject(DEPTNO=[$0], i=[true])
+          LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testJoinSubQueryRemoveRuleWithQuantifierSome">
+    <Resource name="sql">
+      <![CDATA[SELECT empno FROM emp JOIN dept on emp.deptno >= SOME(SELECT 
deptno FROM dept)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0])
+  LogicalJoin(condition=[>= SOME($7, {
+LogicalProject(DEPTNO=[$0])
+  LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+})], joinType=[inner])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$12], NAME=[$13])
+    LogicalJoin(condition=[CAST(OR(AND(IS TRUE(>=($7, $9)), <>($10, 0)), 
AND(>($10, $11), null, <>($10, 0), IS NOT TRUE(>=($7, $9))), AND(>=($7, $9), 
<>($10, 0), IS NOT TRUE(>=($7, $9)), <=($10, $11)))):BOOLEAN NOT NULL], 
joinType=[inner])
+      LogicalJoin(condition=[true], joinType=[inner])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+        LogicalProject(m=[$0], c=[$1], d=[$1])
+          LogicalAggregate(group=[{}], m=[MIN($0)], c=[COUNT()])
+            LogicalProject(DEPTNO=[$0])
+              LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testJoinSubQueryRewriteWithBothSidesColumns">
+    <Resource name="sql">
+      <![CDATA[SELECT empno FROM emp JOIN dept on emp.deptno + dept.deptno >= 
SOME(SELECT deptno FROM dept)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0])
+  LogicalJoin(condition=[>= SOME(+($7, $9), {
+LogicalProject(DEPTNO=[$0])
+  LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+})], joinType=[inner])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
 ]]>
     </Resource>
   </TestCase>
diff --git a/core/src/test/resources/sql/sub-query.iq 
b/core/src/test/resources/sql/sub-query.iq
index 1bee162a1e..01bfadc4b8 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -405,6 +405,78 @@ EnumerableCalc(expr#0..4=[{inputs}], EMPNO=[$t0])
       EnumerableTableScan(table=[[scott, DEPT]])
 !plan
 
+# [CALCITE-6824] Subquery in join conditions rewrite fails if referencing a 
column from the right-hand side table
+select empno from "scott".emp where (empno not in (select dept.deptno from 
dept))
+in (select deptno = 0 from dept);
++-------+
+| EMPNO |
++-------+
++-------+
+(0 rows)
+
+!ok
+EnumerableCalc(expr#0..3=[{inputs}], EMPNO=[$t0])
+  EnumerableNestedLoopJoin(condition=[=(IS NULL($2), $3)], joinType=[inner])
+    EnumerableMergeJoin(condition=[=($0, $1)], joinType=[left])
+      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0])
+        EnumerableTableScan(table=[[scott, EMP]])
+      EnumerableSort(sort0=[$0], dir0=[ASC])
+        EnumerableAggregate(group=[{0}], i=[LITERAL_AGG(true)])
+          EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):SMALLINT NOT 
NULL], DEPTNO=[$t3])
+            EnumerableTableScan(table=[[scott, DEPT]])
+    EnumerableAggregate(group=[{0}])
+      EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):INTEGER NOT 
NULL], expr#4=[0], expr#5=[=($t3, $t4)], EXPR$0=[$t5])
+        EnumerableTableScan(table=[[scott, DEPT]])
+!plan
+
+# [CALCITE-6824] Subquery in join conditions rewrite fails if referencing a 
column from the right-hand side table
+SELECT empno FROM emp JOIN dept on emp.deptno <= ALL(SELECT deptno FROM dept) 
and emp.deptno = dept.deptno;
++-------+
+| EMPNO |
++-------+
+|  7782 |
+|  7839 |
+|  7934 |
++-------+
+(3 rows)
+
+!ok
+EnumerableCalc(expr#0..4=[{inputs}], EMPNO=[$t0])
+  EnumerableHashJoin(condition=[=($1, $5)], joinType=[semi])
+    EnumerableNestedLoopJoin(condition=[OR(=($3, 0), AND(<=($1, $2), IS NOT 
TRUE(OR(>($1, $2), >($3, $4)))))], joinType=[inner])
+      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
+        EnumerableTableScan(table=[[scott, EMP]])
+      EnumerableCalc(expr#0..1=[{inputs}], proj#0..1=[{exprs}], d=[$t1])
+        EnumerableAggregate(group=[{}], m=[MIN($0)], c=[COUNT()])
+          EnumerableTableScan(table=[[scott, DEPT]])
+    EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
+      EnumerableTableScan(table=[[scott, DEPT]])
+!plan
+
+# [CALCITE-6824] Subquery in join conditions rewrite fails if referencing a 
column from the right-hand side table
+SELECT empno FROM emp JOIN dept on emp.deptno = (SELECT min(deptno) FROM dept) 
and emp.deptno = dept.deptno;
++-------+
+| EMPNO |
++-------+
+|  7782 |
+|  7839 |
+|  7934 |
++-------+
+(3 rows)
+
+!ok
+EnumerableCalc(expr#0..2=[{inputs}], EMPNO=[$t0])
+  EnumerableHashJoin(condition=[=($2, $3)], joinType=[semi])
+    EnumerableCalc(expr#0..2=[{inputs}], EMPNO=[$t1], DEPTNO=[$t2], 
EXPR$0=[$t0])
+      EnumerableHashJoin(condition=[=($0, $2)], joinType=[inner])
+        EnumerableAggregate(group=[{}], EXPR$0=[MIN($0)])
+          EnumerableTableScan(table=[[scott, DEPT]])
+        EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
+          EnumerableTableScan(table=[[scott, EMP]])
+    EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
+      EnumerableTableScan(table=[[scott, DEPT]])
+!plan
+
 # Correlated NOT IN sub-query in WHERE clause of JOIN
 select empno from "scott".emp as e
 join "scott".dept as d using (deptno)

Reply via email to