Ian Bertolacci created CALCITE-7324:
---------------------------------------
Summary: NullPointerException from RelDecorrelator with multiple
subqueries where one has a complex correlated condition
Key: CALCITE-7324
URL: https://issues.apache.org/jira/browse/CALCITE-7324
Project: Calcite
Issue Type: Bug
Reporter: Ian Bertolacci
Example SQL (using TCP-H schema):
{code:Sql}
select
(select count(*) from PART where p_partkey = PARTSUPP.ps_partkey),
(select count(*) from SUPPLIER where s_suppkey = case when s_acctbal > 0
then PARTSUPP.ps_partkey + 1 else 1234 end)
from PARTSUPP
{code}
Exception:
{code}
Exception in thread "main" java.lang.NullPointerException
at java.base/java.util.Objects.requireNonNull(Objects.java:209)
at
org.apache.calcite.sql2rel.RelDecorrelator.createValueGenerator(RelDecorrelator.java:1272)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateInputWithValueGenerator(RelDecorrelator.java:1466)
at
org.apache.calcite.sql2rel.RelDecorrelator.maybeAddValueGenerator(RelDecorrelator.java:1385)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1595)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1560)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:569)
at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
at
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:614)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:596)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:569)
at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
at
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1640)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1615)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:569)
at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
at
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1192)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1175)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:569)
at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
at
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1192)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1175)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:569)
at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
at
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1192)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateRel(RelDecorrelator.java:1175)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:569)
at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:533)
at
org.apache.calcite.sql2rel.RelDecorrelator.getInvoke(RelDecorrelator.java:1013)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelate(RelDecorrelator.java:342)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateQuery(RelDecorrelator.java:261)
at
org.apache.calcite.sql2rel.RelDecorrelator.decorrelateQuery(RelDecorrelator.java:221)
at
com.github.zabetak.calcite.tutorial.RecursiveQueryIssueDemo.main(RecursiveQueryIssueDemo.java:170)
{code}
I demonstrated this in several calcite versions using the below (which I built
in the LuceneQueryProcessor project):
{code:java}
public static void main(String[] args) throws Exception {
String sqlQuery =
"SELECT (SELECT count(*) from PART where p_partkey = PARTSUPP.ps_partkey), " +
"(select count(*) from SUPPLIER where s_suppkey = case when s_acctbal > 0
then PARTSUPP.ps_partkey + 1 else 1234 end) " +
"FROM PARTSUPP";
System.out.println("[Input query]");
System.out.println(sqlQuery);
// Create the schema and table data types
CalciteSchema schema = CalciteSchema.createRootSchema(true);
RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl();
for (TpchTable table : TpchTable.values()) {
RelDataTypeFactory.Builder builder = new
RelDataTypeFactory.Builder(typeFactory);
for (TpchTable.Column column : table.columns) {
RelDataType type = typeFactory.createJavaType(column.type);
builder.add(column.name, type.getSqlTypeName()).nullable(true);
}
String indexPath = DatasetIndexer.INDEX_LOCATION + "/tpch/" + table.name();
LuceneTable lt = new LuceneTable(indexPath, builder.build());
schema.add(table.name(), new LuceneTable(indexPath, builder.build()));
}
// Configure and instantiate validator
Properties props = new Properties();
props.setProperty(CalciteConnectionProperty.CASE_SENSITIVE.camelName(),
"false");
CalciteConnectionConfig config = new CalciteConnectionConfigImpl(props);
CalciteCatalogReader catalogReader = new CalciteCatalogReader(schema,
Collections.singletonList("bs"),
typeFactory, config);
SqlValidator validator =
SqlValidatorUtil.newValidator(SqlStdOperatorTable.instance(),
catalogReader, typeFactory,
SqlValidator.Config.DEFAULT);
// Configure and instantiate the converter of the AST to Logical plan (requires
opt cluster)
RelOptCluster cluster = newCluster(typeFactory);
SqlToRelConverter relConverter = new SqlToRelConverter(
NOOP_EXPANDER,
validator,
catalogReader,
cluster,
StandardConvertletTable.INSTANCE,
SqlToRelConverter.config().withDecorrelationEnabled(false));
// Create an SQL parser
SqlParser parser = SqlParser.create(sqlQuery);
// Parse the query into an AST
SqlNode sqlNode = parser.parseQuery();
System.out.println("[Parsed query]");
System.out.println(sqlNode.toString());
// Validate the initial AST
SqlNode validNode = validator.validate(sqlNode);
// Convert the valid AST into a logical plan
RelNode logPlan = relConverter.convertQuery(validNode, false, true).rel;
// Display the logical plan
System.out.println(
RelOptUtil.dumpPlan("[Logical plan]", logPlan, SqlExplainFormat.TEXT,
SqlExplainLevel.NON_COST_ATTRIBUTES));
Program decomp =
Programs.hep(
ImmutableList.of(
CoreRules.FILTER_SUB_QUERY_TO_CORRELATE,
CoreRules.PROJECT_SUB_QUERY_TO_CORRELATE,
CoreRules.JOIN_SUB_QUERY_TO_CORRELATE
),
false,
DefaultRelMetadataProvider.INSTANCE
);
RelNode afterToCorrelate = decomp.run(cluster.getPlanner(), logPlan,
logPlan.getTraitSet(), ImmutableList.of(), ImmutableList.of());
System.out.println(
RelOptUtil.dumpPlan("[Post to Correlate plan]", afterToCorrelate,
SqlExplainFormat.TEXT,
SqlExplainLevel.NON_COST_ATTRIBUTES));
RelNode decorrelated = RelDecorrelator.decorrelateQuery(
afterToCorrelate,
RelFactories.LOGICAL_BUILDER.create(cluster, catalogReader));
System.out.println(
RelOptUtil.dumpPlan("[Decorrelated plan]", decorrelated,
SqlExplainFormat.TEXT,
SqlExplainLevel.NON_COST_ATTRIBUTES));
{code}
Plan from sql->rel conversion:
{code}
[Logical plan]
LogicalProject(variablesSet=[[$cor0]], EXPR$0=[$SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalFilter(condition=[=($0, $cor0.ps_partkey)])
LogicalTableScan(table=[[PART]])
})], EXPR$1=[$SCALAR_QUERY({
LogicalAggregate(group=[{}], EXPR$0=[COUNT()])
LogicalFilter(condition=[=($0, CASE(>($5, CAST(0):DOUBLE NOT NULL),
+($cor0.ps_partkey, 1), 1234))])
LogicalTableScan(table=[[SUPPLIER]])
})]), id = 17
LogicalTableScan(table=[[PARTSUPP]]), id = 1
{code}
Plan after applying the sub_query_to_correlate rules:
{code}
[Post to Correlate plan]
LogicalProject(EXPR$0=[$5], EXPR$1=[$6]), id = 41
LogicalCorrelate(correlation=[$cor0], joinType=[left],
requiredColumns=[{0}]), id = 39
LogicalCorrelate(correlation=[$cor0], joinType=[left],
requiredColumns=[{0}]), id = 28
LogicalTableScan(table=[[PARTSUPP]]), id = 1
LogicalAggregate(group=[{}], EXPR$0=[COUNT()]), id = 26
LogicalFilter(condition=[=($0, $cor0.ps_partkey)]), id = 24
LogicalTableScan(table=[[PART]]), id = 3
LogicalAggregate(group=[{}], EXPR$0=[COUNT()]), id = 37
LogicalFilter(condition=[=($0, CASE(>($5, CAST(0):DOUBLE NOT NULL),
+($cor0.ps_partkey, 1), 1234))]), id = 35
LogicalTableScan(table=[[SUPPLIER]]), id = 8
{code}
Whats very strange is that if I use a different CorrelateFactory in the
RelBuilder used in the decorrelation, no exception is produced, and
decorrelation is performed (though I have not checked the correctness:
{code:java}
RelNode decorrelated = RelDecorrelator.decorrelateQuery(
afterToCorrelate,
// Special builder with CorrelateFactory override
RelBuilder.proto(
Contexts.of(
new CorrelateFactory() {
// MyCorrelate which is a mirror of LogicalCorrelate
class MyCorrelate extends Correlate{
public MyCorrelate(
RelOptCluster cluster,
RelTraitSet traitSet,
List<RelHint> hints,
RelNode left,
RelNode right,
CorrelationId correlationId,
ImmutableBitSet requiredColumns,
JoinRelType joinType) {
super(
cluster,
traitSet,
hints,
left,
right,
correlationId,
requiredColumns,
joinType);
}
@Override public MyCorrelate copy(
RelTraitSet traitSet,
RelNode left, RelNode right, CorrelationId correlationId,
ImmutableBitSet requiredColumns, JoinRelType joinType
) {
return new MyCorrelate(getCluster(), traitSet, hints, left,
right, correlationId, requiredColumns, joinType);
}
@Override public RelNode accept(RelShuttle shuttle) {
return shuttle.visit(this);
}
}
public RelNode createCorrelate(RelNode left, RelNode right,
List<RelHint> hints,
CorrelationId correlationId, ImmutableBitSet requiredColumns,
JoinRelType joinType) {
// Mirror LogicalCorrelate.create
final RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE);
return new MyCorrelate(cluster, traitSet, hints, left, right,
correlationId,requiredColumns, joinType);
}
}
)
).create(cluster, catalogReader)
);
{code}
Producing this plan:
{code}
[Decorrelated plan]
LogicalProject(EXPR$0=[$5], EXPR$1=[CASE(IS NULL($7), 0:BIGINT, $7)]), id = 182
LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $6)], joinType=[left]), id =
180
LogicalProject(ps_partkey=[$0], ps_suppkey=[$1], ps_availqty=[$2],
ps_supplycost=[$3], ps_comment=[$4], EXPR$0=[CASE(IS NULL($6), 0:BIGINT, $6)]),
id = 139
LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $5)], joinType=[left]),
id = 137
LogicalTableScan(table=[[PARTSUPP]]), id = 1
LogicalProject(p_partkey=[$0], EXPR$0=[CASE(IS NOT NULL($2), $2, 0)]),
id = 135
LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)],
joinType=[left]), id = 133
LogicalAggregate(group=[{0}]), id = 124
LogicalTableScan(table=[[PARTSUPP]]), id = 1
LogicalAggregate(group=[{0}], EXPR$0=[COUNT()]), id = 131
LogicalProject(p_partkey=[$0]), id = 129
LogicalFilter(condition=[IS NOT NULL($0)]), id = 127
LogicalTableScan(table=[[PART]]), id = 3
LogicalProject(ps_partkey=[$0], EXPR$0=[CASE(IS NOT NULL($2), $2, 0)]), id
= 178
LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], joinType=[left]),
id = 176
LogicalAggregate(group=[{0}]), id = 160
LogicalProject(ps_partkey=[$0]), id = 158
LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $5)],
joinType=[left]), id = 156
LogicalTableScan(table=[[PARTSUPP]]), id = 1
LogicalProject(p_partkey=[$0], EXPR$0=[CASE(IS NOT NULL($2), $2,
0)]), id = 154
LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)],
joinType=[left]), id = 152
LogicalAggregate(group=[{0}]), id = 143
LogicalTableScan(table=[[PARTSUPP]]), id = 1
LogicalAggregate(group=[{0}], EXPR$0=[COUNT()]), id = 150
LogicalProject(p_partkey=[$0]), id = 148
LogicalFilter(condition=[IS NOT NULL($0)]), id = 146
LogicalTableScan(table=[[PART]]), id = 3
LogicalAggregate(group=[{0}], EXPR$0=[COUNT()]), id = 174
LogicalProject(ps_partkey=[$7]), id = 172
LogicalJoin(condition=[=($0, CASE(>($5, 0.0E0), +($7, 1), 1234))],
joinType=[inner]), id = 184
LogicalTableScan(table=[[SUPPLIER]]), id = 8
LogicalAggregate(group=[{0}]), id = 166
LogicalProject(ps_partkey=[$0]), id = 164
LogicalTableScan(table=[[PARTSUPP]]), id = 1
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)