This is an automated email from the ASF dual-hosted git repository.
huajianlan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2a2786fb8b2 [fix](nereids) fix bucket shuffle join to right compute
wrong output property (#47888)
2a2786fb8b2 is described below
commit 2a2786fb8b23132846a5bf2d9f7d924d0c37dd67
Author: 924060929 <[email protected]>
AuthorDate: Mon Feb 17 10:43:04 2025 +0800
[fix](nereids) fix bucket shuffle join to right compute wrong output
property (#47888)
fix Illegal bucket shuffle join or colocate join in fragment because
compute wrong join output property, introduced by #41730
the exception:
```
errCode = 2, detailMessage = Illegal bucket shuffle join or colocate join
in fragment
```
---
.../properties/ChildOutputPropertyDeriver.java | 26 ++++---
.../properties/ChildOutputPropertyDeriverTest.java | 12 +++-
.../distribute/bucket_shuffle_to_right.out | Bin 0 -> 192 bytes
.../distribute/bucket_shuffle_to_right.groovy | 78 +++++++++++++++++++++
4 files changed, 105 insertions(+), 11 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java
index 8dc77a43f50..ef89c327605 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriver.java
@@ -505,32 +505,42 @@ public class ChildOutputPropertyDeriver extends
PlanVisitor<PhysicalProperties,
return new PhysicalProperties(
DistributionSpecHash.merge(rightHashSpec,
leftHashSpec, outputShuffleType)
);
- } else {
+ } else if (shuffleSide == ShuffleSide.RIGHT || shuffleSide ==
ShuffleSide.NONE) {
+ return new PhysicalProperties(
+ DistributionSpecHash.merge(leftHashSpec,
rightHashSpec, outputShuffleType)
+ );
+ } else if (shuffleSide == ShuffleSide.BOTH) {
return new PhysicalProperties(
DistributionSpecHash.merge(leftHashSpec,
rightHashSpec, outputShuffleType)
+
.withShuffleTypeAndForbidColocateJoin(leftHashSpec.getShuffleType())
);
+ } else {
+ throw new AnalysisException("unknown shuffle side " +
shuffleSide);
}
case LEFT_SEMI_JOIN:
case LEFT_ANTI_JOIN:
case NULL_AWARE_LEFT_ANTI_JOIN:
case LEFT_OUTER_JOIN:
- if (shuffleSide == ShuffleSide.LEFT) {
+ if (shuffleSide == ShuffleSide.LEFT || shuffleSide ==
ShuffleSide.BOTH) {
return new PhysicalProperties(
leftHashSpec.withShuffleTypeAndForbidColocateJoin(outputShuffleType)
);
- } else {
+ } else if (shuffleSide == ShuffleSide.RIGHT || shuffleSide ==
ShuffleSide.NONE) {
return new PhysicalProperties(leftHashSpec);
+ } else {
+ throw new AnalysisException("unknown shuffle side " +
shuffleSide);
}
case RIGHT_SEMI_JOIN:
case RIGHT_ANTI_JOIN:
case RIGHT_OUTER_JOIN:
- if (JoinUtils.couldColocateJoin(leftHashSpec, rightHashSpec,
hashJoin.getHashJoinConjuncts())) {
+ if (shuffleSide == ShuffleSide.RIGHT || shuffleSide ==
ShuffleSide.BOTH) {
+ return new PhysicalProperties(
+
rightHashSpec.withShuffleTypeAndForbidColocateJoin(outputShuffleType)
+ );
+ } else if (shuffleSide == ShuffleSide.LEFT || shuffleSide ==
ShuffleSide.NONE) {
return new PhysicalProperties(rightHashSpec);
} else {
- // retain left shuffle type, since coordinator use left
most node to schedule fragment
- // forbid colocate join, since right table already shuffle
- return new
PhysicalProperties(rightHashSpec.withShuffleTypeAndForbidColocateJoin(
- leftHashSpec.getShuffleType()));
+ throw new AnalysisException("unknown shuffle side " +
shuffleSide);
}
case FULL_OUTER_JOIN:
return PhysicalProperties.createAnyFromHash(leftHashSpec,
rightHashSpec);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriverTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriverTest.java
index 8351b9f8c22..cc26ba2945b 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriverTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/ChildOutputPropertyDeriverTest.java
@@ -55,6 +55,7 @@ import org.apache.doris.nereids.types.TinyIntType;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.JoinUtils;
import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.qe.SessionVariable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
@@ -390,11 +391,12 @@ class ChildOutputPropertyDeriverTest {
GroupExpression groupExpression = new GroupExpression(join);
new Group(null, groupExpression, null);
+ long leftTableId = 0L;
PhysicalProperties left = new PhysicalProperties(
new DistributionSpecHash(
Lists.newArrayList(new ExprId(0)),
ShuffleType.NATURAL,
- 0,
+ leftTableId,
Sets.newHashSet(0L)
),
new OrderSpec(
@@ -402,10 +404,11 @@ class ChildOutputPropertyDeriverTest {
true, true)))
);
+ long rightTableId = 1L;
PhysicalProperties right = new PhysicalProperties(new
DistributionSpecHash(
Lists.newArrayList(new ExprId(1)),
ShuffleType.NATURAL,
- 1,
+ rightTableId,
Sets.newHashSet(1L)
));
@@ -416,8 +419,11 @@ class ChildOutputPropertyDeriverTest {
Assertions.assertTrue(result.getOrderSpec().getOrderKeys().isEmpty());
Assertions.assertInstanceOf(DistributionSpecHash.class,
result.getDistributionSpec());
DistributionSpecHash actual = (DistributionSpecHash)
result.getDistributionSpec();
+
Assertions.assertEquals(ShuffleType.NATURAL, actual.getShuffleType());
- Assertions.assertEquals(-1, actual.getTableId());
+ Assertions.assertEquals(
+ SessionVariable.canUseNereidsDistributePlanner() ?
rightTableId : -1L, actual.getTableId()
+ );
// check merged
Assertions.assertEquals(1, actual.getExprIdToEquivalenceSet().size());
Assertions.assertEquals(1,
actual.getExprIdToEquivalenceSet().keySet().iterator().next().asInt());
diff --git
a/regression-test/data/nereids_syntax_p0/distribute/bucket_shuffle_to_right.out
b/regression-test/data/nereids_syntax_p0/distribute/bucket_shuffle_to_right.out
new file mode 100644
index 00000000000..b17e5bf2ec5
Binary files /dev/null and
b/regression-test/data/nereids_syntax_p0/distribute/bucket_shuffle_to_right.out
differ
diff --git
a/regression-test/suites/nereids_syntax_p0/distribute/bucket_shuffle_to_right.groovy
b/regression-test/suites/nereids_syntax_p0/distribute/bucket_shuffle_to_right.groovy
new file mode 100644
index 00000000000..6bd2ff14973
--- /dev/null
+++
b/regression-test/suites/nereids_syntax_p0/distribute/bucket_shuffle_to_right.groovy
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("bucket_shuffle_to_right") {
+ multi_sql """
+ set enable_nereids_distribute_planner=true;
+ set disable_join_reorder=true;
+
+ drop table if exists
table_20_undef_partitions2_keys3_properties4_distributed_by53;
+ create table
table_20_undef_partitions2_keys3_properties4_distributed_by53 (
+
`pk` int,
+
`col_varchar_10__undef_signed` varchar(10) ,
+
`col_int_undef_signed` int ,
+
`col_varchar_1024__undef_signed` varchar(1024)
+ ) engine=olap
+ DUPLICATE KEY(pk, col_varchar_10__undef_signed)
+
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_20_undef_partitions2_keys3_properties4_distributed_by53(pk,col_int_undef_signed,col_varchar_10__undef_signed,col_varchar_1024__undef_signed)
values
(0,null,"at",'b'),(1,null,'e',"your"),(2,6,'h',"look"),(3,3,'z',"to"),(4,6,'q',"did"),(5,7,"come",'g'),(6,null,'x',"his"),(7,null,'w','s'),(8,null,"don't",'l'),(9,null,"and","know"),(10,null,'q','c'),(11,null,'u','w'),(12,9,'c','x'),(13,null,"my","or"),(14,null,'a','i'),(15,null,"look",'u'),(16,2,"were","be"),(17,nul
[...]
+
+
+ drop table if exists
table_23_undef_partitions2_keys3_properties4_distributed_by52;
+ create table
table_23_undef_partitions2_keys3_properties4_distributed_by52 (
+
`pk` int,
+
`col_int_undef_signed` int ,
+
`col_varchar_10__undef_signed` varchar(10) ,
+
`col_varchar_1024__undef_signed` varchar(1024) MIN
+ ) engine=olap
+ AGGREGATE KEY(pk, col_int_undef_signed, col_varchar_10__undef_signed)
+
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_23_undef_partitions2_keys3_properties4_distributed_by52(pk,col_int_undef_signed,col_varchar_10__undef_signed,col_varchar_1024__undef_signed)
values
(0,null,"to","so"),(1,null,'u','j'),(2,null,"say","would"),(3,null,'t',"to"),(4,0,"your",'q'),(5,1,'w',"if"),(6,null,"right",'p'),(7,7,'h',"her"),(8,6,"that",'v'),(9,5,'k',"as"),(10,null,"know","did"),(11,9,"to",'q'),(12,null,"look","don't"),(13,9,"say",'v'),(14,null,'m','j'),(15,9,"i","want"),(16,4,"then","why"),(17
[...]
+
+
+ drop table if exists
table_100_undef_partitions2_keys3_properties4_distributed_by5;
+ create table
table_100_undef_partitions2_keys3_properties4_distributed_by5 (
+
`col_int_undef_signed` int/*agg_type_placeholder*/ ,
+
`col_varchar_10__undef_signed` varchar(10)/*agg_type_placeholder*/ ,
+
`col_varchar_1024__undef_signed` varchar(1024)/*agg_type_placeholder*/
,
+
`pk` int/*agg_type_placeholder*/
+ ) engine=olap
+
+
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_100_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_varchar_10__undef_signed,col_varchar_1024__undef_signed)
values
(0,null,"when","yes"),(1,null,"do",'i'),(2,1,"all","didn't"),(3,null,"don't","who"),(4,9,"your","it"),(5,5,'n','c'),(6,0,"up","it's"),(7,9,'d','a'),(8,3,"yeah",'v'),(9,null,'r','s'),(10,5,'s','n'),(11,null,'w','l'),(12,null,'k',"she"),(13,1,"from","what"),(14,1,'t',"at"),(15,null,"something",'s'),(16,null,'q',"his"),
[...]
+ """
+
+ order_qt_bucket_shuffle_to_right """
+ select alias1 . `col_int_undef_signed` AS field1
+ from table_100_undef_partitions2_keys3_properties4_distributed_by5 as
alias3
+ right outer join
+ (
+ select alias1 . `pk`, alias1 . `col_int_undef_signed`
+ from table_23_undef_partitions2_keys3_properties4_distributed_by52
as alias2
+ right outer join
+ table_20_undef_partitions2_keys3_properties4_distributed_by53 as
alias1
+ on alias1 . `pk` = alias2 . `col_int_undef_signed`
+ ) alias1 ON alias1 . `pk` = alias3 . `pk`
+ WHERE alias1 . `pk` >= 0;
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]