This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch auto-pick-47318-branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/auto-pick-47318-branch-2.1 by
this push:
new 8fa5209eb4b [fix](nereids)keep at least one hash output slot when
prune slots in hash join node (#47318)
8fa5209eb4b is described below
commit 8fa5209eb4b0bc88918cf176afe3a772be747df9
Author: starocean999 <[email protected]>
AuthorDate: Mon Feb 10 14:50:47 2025 +0800
[fix](nereids)keep at least one hash output slot when prune slots in hash
join node (#47318)
consider sql bellow:
```
SELECT
9
FROM
table_20_undef_partitions2_keys3_properties4_distributed_by5 AS
tbl_alias2
WHERE
(
NOT (
tbl_alias2.col_int_undef_signed NOT IN (
SELECT
8
FROM
table_50_undef_partitions2_keys3_properties4_distributed_by53
)
AND '2023-12-12' IN ('2023-12-19')
)
);
```
no columns from hash join node is needed, so the hash output slots are
empty. But BE would keep all columns from both table when hash output
slots are empty. So FE will keep at least one column in hash output
slots to let BE happy
---
.../glue/translator/PhysicalPlanTranslator.java | 12 ++
.../test_column_prune_in_hash_join.out | Bin 0 -> 255 bytes
.../test_column_prune_in_hash_join.groovy | 131 +++++++++++++++++++++
3 files changed, 143 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 92bc509955d..a72b3772338 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -1937,6 +1937,7 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
joinNode.setProjectList(projectionExprs);
// prune the hashOutputSlotIds
if (joinNode instanceof HashJoinNode) {
+ Set<SlotId> oldHashOutputSlotIds =
Sets.newHashSet(((HashJoinNode) joinNode).getHashOutputSlotIds());
((HashJoinNode) joinNode).getHashOutputSlotIds().clear();
Set<ExprId> requiredExprIds = Sets.newHashSet();
Set<SlotId> requiredOtherConjunctsSlotIdSet =
Sets.newHashSet();
@@ -1960,6 +1961,17 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
((HashJoinNode)
joinNode).addSlotIdToHashOutputSlotIds(slotId);
}
}
+ if (((HashJoinNode)
joinNode).getHashOutputSlotIds().isEmpty()) {
+ // In FE, if all columns are pruned, hash output slots are
empty.
+ // On the contrary, BE will keep all columns if hash
output slots are empty.
+ // Currently BE will keep this behavior in order to be
compatible with older planner.
+ // So we have to workaround this in FE by keeping at least
one slot in oldHashOutputSlotIds.
+ // TODO: Remove this code when old planner is deleted and
BE changes to be consistent with FE.
+ for (SlotId slotId : oldHashOutputSlotIds) {
+ ((HashJoinNode)
joinNode).addSlotIdToHashOutputSlotIds(slotId);
+ break;
+ }
+ }
}
return inputFragment;
}
diff --git
a/regression-test/data/correctness_p0/test_column_prune_in_hash_join.out
b/regression-test/data/correctness_p0/test_column_prune_in_hash_join.out
new file mode 100644
index 00000000000..9432b56120f
Binary files /dev/null and
b/regression-test/data/correctness_p0/test_column_prune_in_hash_join.out differ
diff --git
a/regression-test/suites/correctness_p0/test_column_prune_in_hash_join.groovy
b/regression-test/suites/correctness_p0/test_column_prune_in_hash_join.groovy
new file mode 100644
index 00000000000..4fcfc8eff4b
--- /dev/null
+++
b/regression-test/suites/correctness_p0/test_column_prune_in_hash_join.groovy
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_column_prune_in_hash_join") {
+ sql """
+ drop table if exists
table_20_undef_partitions2_keys3_properties4_distributed_by5;
+ """
+ sql """
+ drop table if exists
table_50_undef_partitions2_keys3_properties4_distributed_by53;
+ """
+
+ sql """
+ create table
table_20_undef_partitions2_keys3_properties4_distributed_by5 (
+ col_int_undef_signed int null ,
+ col_int_undef_signed_not_null int not null ,
+ col_date_undef_signed date null ,
+ col_date_undef_signed_not_null date not null ,
+ col_varchar_5__undef_signed varchar(5) null ,
+ col_varchar_5__undef_signed_not_null varchar(5) not null ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_int_undef_signed)
+ PARTITION BY RANGE(col_int_undef_signed) (
+ PARTITION p0 VALUES LESS THAN ('4'),
+ PARTITION p1 VALUES LESS THAN ('6'),
+ PARTITION p2 VALUES LESS THAN ('7'),
+ PARTITION p3 VALUES LESS THAN ('8'),
+ PARTITION p4 VALUES LESS THAN ('10'),
+ PARTITION p5 VALUES LESS THAN ('83647'),
+ PARTITION p100 VALUES LESS THAN ('2147483647')
+ )
+
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ """
+
+ sql """
+ create table
table_50_undef_partitions2_keys3_properties4_distributed_by53 (
+ col_date_undef_signed date null ,
+ col_int_undef_signed int null ,
+ col_int_undef_signed_not_null int not null ,
+ col_date_undef_signed_not_null date not null ,
+ col_varchar_5__undef_signed varchar(5) null ,
+ col_varchar_5__undef_signed_not_null varchar(5) not null ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_date_undef_signed, col_int_undef_signed)
+ PARTITION BY RANGE(col_date_undef_signed) (
+ PARTITION p0 VALUES LESS THAN ('2023-12-11'),
+ PARTITION p1 VALUES LESS THAN ('2023-12-15'),
+ PARTITION p2 VALUES LESS THAN ('2023-12-16'),
+ PARTITION p3 VALUES LESS THAN ('2023-12-17'),
+ PARTITION p4 VALUES LESS THAN ('2024-01-18'),
+ PARTITION p5 VALUES LESS THAN ('2026-02-18'),
+ PARTITION p100 VALUES LESS THAN ('9999-12-31')
+ )
+
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ """
+
+ sql """insert into
table_20_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed_not_null,col_date_undef_signed,col_date_undef_signed_not_null,col_varchar_5__undef_signed,col_varchar_5__undef_signed_not_null)
values
(0,6,7625942,null,'2023-12-13','e','c'),(1,4,9,'2023-12-12','2023-12-12','m','w'),(2,null,-17559,'2023-12-10','2023-12-11','p','t'),(3,-15839,8,'2023-12-12','2023-12-12','h','d'),(4,null,5578188,'2023-12-17','2023-12-14','c','i'
[...]
+ sql """insert into
table_50_undef_partitions2_keys3_properties4_distributed_by53(pk,col_int_undef_signed,col_int_undef_signed_not_null,col_date_undef_signed,col_date_undef_signed_not_null,col_varchar_5__undef_signed,col_varchar_5__undef_signed_not_null)
values
(0,2,4,'2023-12-17','2023-12-14','q','t'),(1,3,4,'2023-12-16','2023-12-14','s','j'),(2,-1553,9,'2023-12-17','2004-01-22','w','x'),(3,7,-15007,'2023-12-09','2023-12-14','i','y'),(4,10788,16430,'2023-12-11','2006-08-11','g','f'),
[...]
+
+ qt_sql_prune_all_mark_join """
+ SELECT
+ 9
+ FROM
+ table_20_undef_partitions2_keys3_properties4_distributed_by5 AS
tbl_alias2
+ WHERE
+ (
+ NOT (
+ tbl_alias2.col_int_undef_signed NOT IN (
+ SELECT
+ 8
+ FROM
+
table_50_undef_partitions2_keys3_properties4_distributed_by53
+ )
+ AND '2023-12-12' IN ('2023-12-19')
+ )
+ );
+ """
+
+ qt_sql_prune_other_conjuncts """
+ SELECT
+ 9
+ FROM
+ table_20_undef_partitions2_keys3_properties4_distributed_by5
AS tbl_alias2
+ WHERE
+ (
+ NOT (
+ tbl_alias2.col_int_undef_signed NOT IN (
+ SELECT
+ 8
+ FROM
+
table_50_undef_partitions2_keys3_properties4_distributed_by53
+ )
+ )
+ );
+ """
+
+ qt_sql_prune_all """
+ SELECT
+ 9
+ FROM
+ table_20_undef_partitions2_keys3_properties4_distributed_by5
AS tbl_alias2
+ WHERE
+ (
+ tbl_alias2.col_int_undef_signed NOT IN (
+ SELECT
+ 8
+ FROM
+
table_50_undef_partitions2_keys3_properties4_distributed_by53
+ )
+ );
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]