suibianwanwan created CALCITE-7006:
--------------------------------------
Summary: Incorrect left join results with IS NOT DISTINCT FROM
under specific plan
Key: CALCITE-7006
URL: https://issues.apache.org/jira/browse/CALCITE-7006
Project: Calcite
Issue Type: Bug
Reporter: suibianwanwan
I forcibly applied the EnumerableNestedLoopJoin rule to make the execution
plans identical for {{=}} and {{{}is not distinct from{}}}. Here are test in
Quidem:
{code:java}
SELECT *
FROM (
SELECT EMPNO, COMM, COMM + 1000 AS e1
FROM EMP
) e_outer
LEFT JOIN (
SELECT e0, TRUE AS EXISTS_FLAG
FROM (
SELECT t1.EMPNO, t1.e1, t2.e0
FROM (
SELECT EMPNO, COMM + 100 AS e1, COMM
FROM EMP
) t1
JOIN (
SELECT COMM + 1000 AS e0
FROM EMP
GROUP BY COMM + 1000
) t2
ON t1.COMM IS NULL OR t1.e1 > t2.e0
) j
GROUP BY e0
) subq
ON e_outer.e1 = subq.e0;
EMPNO | COMM | E1 | E0 | EXISTS_FLAG
-------+---------+---------+---------+-------------
7369 | | | | null
7499 | 300.00 | 1300.00 | 1300.00 | true
7521 | 500.00 | 1500.00 | 1500.00 | true
7566 | | | | null
7654 | 1400.00 | 2400.00 | 2400.00 | true
7698 | | | | null
7782 | | | | null
7788 | | | | null
7839 | | | | null
7844 | 0.00 | 1000.00 | 1000.00 | true
7876 | | | | null
7900 | | | | null
7902 | | | | null
7934 | | | | null
(14 rows)
EnumerableNestedLoopJoin(condition=[=($2, $3)], joinType=[left])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)],
EMPNO=[$t0], COMM=[$t6], E1=[$t9])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
EnumerableAggregate(group=[{3}])
EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))],
joinType=[inner])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6,
$t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableAggregate(group=[{0}])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6,
$t8)], E0=[$t9])
EnumerableTableScan(table=[[scott, EMP]]) {code}
Replace = with is not distinct from:
{code:java}
SELECT *
FROM (
SELECT EMPNO, COMM, COMM + 1000 AS e1
FROM EMP
) e_outer
LEFT JOIN (
SELECT e0, TRUE AS EXISTS_FLAG
FROM (
SELECT t1.EMPNO, t1.e1, t2.e0
FROM (
SELECT EMPNO, COMM + 100 AS e1, COMM
FROM EMP
) t1
JOIN (
SELECT COMM + 1000 AS e0
FROM EMP
GROUP BY COMM + 1000
) t2
ON t1.COMM IS NULL OR t1.e1 > t2.e0
) j
GROUP BY e0
) subq
ON e_outer.e1 is not distinct from subq.e0;
EMPNO | COMM | E1 | E0 | EXISTS_FLAG
-------+---------+---------+----+-------------
7369 | | | | true
7499 | 300.00 | 1300.00 | | null
7521 | 500.00 | 1500.00 | | null
7566 | | | | true
7654 | 1400.00 | 2400.00 | | null
7698 | | | | true
7782 | | | | true
7788 | | | | true
7839 | | | | true
7844 | 0.00 | 1000.00 | | null
7876 | | | | true
7900 | | | | true
7902 | | | | true
7934 | | | | true
(14 rows)
EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($2, $3)],
joinType=[left])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)],
EMPNO=[$t0], COMM=[$t6], E1=[$t9])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
EnumerableAggregate(group=[{3}])
EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))],
joinType=[inner])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6,
$t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
EnumerableTableScan(table=[[scott, EMP]])
EnumerableAggregate(group=[{0}])
EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6,
$t8)], E0=[$t9])
EnumerableTableScan(table=[[scott, EMP]]) {code}
>From the plan, aside from IS NOT DISTINCT FROM, the plans are identical.
>However, the IS NOT DISTINCT FROM plan failed to join non-NULL data
>successfully. I tried simplifying the SQL but couldn't reproduce the issue.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)