This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d0a8cd0fc5 [fix](nereids) dphyper join reorder may lost some join
conjuncts (#19318)
d0a8cd0fc5 is described below
commit d0a8cd0fc5ad8febe6afad8b7570a10b722c6419
Author: starocean999 <[email protected]>
AuthorDate: Wed May 10 19:02:35 2023 +0800
[fix](nereids) dphyper join reorder may lost some join conjuncts (#19318)
---
.../org/apache/doris/nereids/NereidsPlanner.java | 10 +-
.../hypergraph/receiver/PlanReceiver.java | 40 +++----
.../java/org/apache/doris/nereids/memo/Group.java | 8 +-
.../java/org/apache/doris/nereids/memo/Memo.java | 4 +-
.../functions_test/test_dpjoin_reorder.groovy | 126 +++++++++++++++++++++
5 files changed, 157 insertions(+), 31 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index e85babed40..7bf0339488 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -54,6 +54,7 @@ import org.apache.doris.planner.ScanNode;
import org.apache.doris.qe.ConnectContext;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import io.opentelemetry.api.trace.Span;
@@ -270,16 +271,23 @@ public class NereidsPlanner extends Planner {
private void dpHypOptimize() {
Group root = getRoot();
+ boolean changeRoot = false;
if (root.isInnerJoinGroup()) {
// If the root group is join group, DPHyp can change the root
group.
// To keep the root group is not changed, we add a project
operator above join
List<NamedExpression> outputs =
ImmutableList.copyOf(root.getLogicalExpression().getPlan().getOutput());
LogicalPlan plan = new LogicalProject<>(outputs,
root.getLogicalExpression().getPlan());
- CopyInResult copyInResult = cascadesContext.getMemo().copyIn(plan,
null, false);
+ CopyInResult copyInResult = cascadesContext.getMemo().copyIn(plan,
null, true);
root = copyInResult.correspondingExpression.getOwnerGroup();
+ Preconditions.checkArgument(copyInResult.generateNewExpression,
+ "the top project node can't be generated for
dpHypOptimize");
+ changeRoot = true;
}
cascadesContext.pushJob(new JoinOrderJob(root,
cascadesContext.getCurrentJobContext()));
cascadesContext.getJobScheduler().executeJobPool(cascadesContext);
+ if (changeRoot) {
+
cascadesContext.getMemo().setRoot(root.getLogicalExpression().child(0));
+ }
}
/**
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java
index 892e963810..5f36971fd2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/joinorder/hypergraph/receiver/PlanReceiver.java
@@ -102,11 +102,7 @@ public class PlanReceiver implements AbstractReceiver {
Preconditions.checkArgument(planTable.containsKey(left));
Preconditions.checkArgument(planTable.containsKey(right));
- // check if the missed edges can be correctly connected by add it to
edges
- // if not, the plan is invalid because of the missed edges, just
return and seek for another valid plan
- if (!processMissedEdges(left, right, edges)) {
- return true;
- }
+ processMissedEdges(left, right, edges);
Memo memo = jobContext.getCascadesContext().getMemo();
emitCount += 1;
@@ -165,37 +161,27 @@ public class PlanReceiver implements AbstractReceiver {
return outputSlots;
}
- // check if the missed edges can be used to connect left and right
together with edges
- // return true if no missed edge or the missed edge can be used to connect
left and right
- // the returned edges includes missed edges if there is any.
- private boolean processMissedEdges(long left, long right, List<Edge>
edges) {
- boolean canAddMisssedEdges = true;
-
- // find all reference nodes assume left and right sub graph is
connected
+ // add any missed edge into edges to connect left and right
+ private void processMissedEdges(long left, long right, List<Edge> edges) {
+ // find all used edges
BitSet usedEdgesBitmap = new BitSet();
usedEdgesBitmap.or(usdEdges.get(left));
usedEdgesBitmap.or(usdEdges.get(right));
edges.forEach(edge -> usedEdgesBitmap.set(edge.getIndex()));
- long allReferenceNodes = getAllReferenceNodes(usedEdgesBitmap);
- // check all edges
- // the edge is a missed edge if the edge is not used and its reference
nodes is a subset of allReferenceNodes
+ // find all referenced nodes
+ long allReferenceNodes = LongBitmap.or(left, right);
+
+ // find the edge which is not in usedEdgesBitmap and its referenced
nodes is subset of allReferenceNodes
for (Edge edge : hyperGraph.getEdges()) {
- if (LongBitmap.isSubset(edge.getReferenceNodes(),
allReferenceNodes) && !usedEdgesBitmap.get(
- edge.getIndex())) {
- // check the missed edge can be used to connect left and right
together with edges
- // if the missed edge meet the 2 conditions, it is a valid edge
- // 1. the edge's left child's referenced nodes is subset of
the left
- // 2. the edge's original right node is subset of right
- canAddMisssedEdges = canAddMisssedEdges &&
LongBitmap.isSubset(edge.getLeft(),
- left) && LongBitmap.isSubset(edge.getOriginalRight(),
right);
-
- // always add the missed edge to edges
- // because the caller will return immediately if
canAddMisssedEdges is false
+ long referenceNodes =
+ LongBitmap.newBitmapUnion(edge.getOriginalLeft(),
edge.getOriginalRight());
+ if (LongBitmap.isSubset(referenceNodes, allReferenceNodes)
+ && !usedEdgesBitmap.get(edge.getIndex())) {
+ // add the missed edge to edges
edges.add(edge);
}
}
- return canAddMisssedEdges;
}
private long getAllReferenceNodes(BitSet edgesBitmap) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
index 5bd11c71e7..5bc32cb64f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
@@ -21,6 +21,7 @@ import org.apache.doris.common.Pair;
import org.apache.doris.nereids.cost.Cost;
import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.properties.PhysicalProperties;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
@@ -368,8 +369,11 @@ public class Group {
public boolean isInnerJoinGroup() {
Plan plan = getLogicalExpression().getPlan();
if (plan instanceof LogicalJoin) {
- // Right now, we only support inner join
- return ((LogicalJoin) plan).getJoinType() == JoinType.INNER_JOIN;
+ // Right now, we only support inner join with some join conditions
+ return ((LogicalJoin) plan).getJoinType() == JoinType.INNER_JOIN
+ && (((LogicalJoin) plan).getOtherJoinConjuncts().isEmpty()
+ || !(((LogicalJoin) plan).getOtherJoinConjuncts()
+ .get(0) instanceof Literal));
}
return false;
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
index f8d7694dcc..bb7b540f78 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
@@ -524,7 +524,9 @@ public class Memo {
}
public Group newGroup(LogicalProperties logicalProperties) {
- return new Group(groupIdGenerator.getNextId(), logicalProperties);
+ Group group = new Group(groupIdGenerator.getNextId(),
logicalProperties);
+ groups.put(group.getGroupId(), group);
+ return group;
}
// This function is used to copy new group expression
diff --git
a/regression-test/suites/tpcds_sf1_p1/functions_test/test_dpjoin_reorder.groovy
b/regression-test/suites/tpcds_sf1_p1/functions_test/test_dpjoin_reorder.groovy
new file mode 100644
index 0000000000..d30ddef991
--- /dev/null
+++
b/regression-test/suites/tpcds_sf1_p1/functions_test/test_dpjoin_reorder.groovy
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_dp_join_reorder") {
+ sql """set enable_nereids_planner=true"""
+ sql """set enable_dphyp_optimizer=true"""
+ sql """set enable_fallback_to_original_planner=false"""
+ sql "use regression_test_tpcds_sf1_p1"
+
+ explain {
+ sql("""
+ WITH
+ cs_ui AS (
+ SELECT
+ cs_item_sk
+ , sum(cs_ext_list_price) sale
+ , sum(((cr_refunded_cash + cr_reversed_charge) +
cr_store_credit)) refund
+ FROM
+ catalog_sales
+ , catalog_returns
+ WHERE (cs_item_sk = cr_item_sk)
+ AND (cs_order_number = cr_order_number)
+ GROUP BY cs_item_sk
+ HAVING (sum(cs_ext_list_price) > (2 *
sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit))))
+ ),
+ cross_sales AS (
+ SELECT
+ i_product_name product_name
+ , i_item_sk item_sk
+ , s_store_name store_name
+ , s_zip store_zip
+ , ad1.ca_street_number b_street_number
+ , ad1.ca_street_name b_street_name
+ , ad1.ca_city b_city
+ , ad1.ca_zip b_zip
+ , ad2.ca_street_number c_street_number
+ , ad2.ca_street_name c_street_name
+ , ad2.ca_city c_city
+ , ad2.ca_zip c_zip
+ , d1.d_year syear
+ , d2.d_year fsyear
+ , d3.d_year s2year
+ FROM
+ store_sales
+ , store_returns
+ , cs_ui
+ , date_dim d1
+ , date_dim d2
+ , date_dim d3
+ , store
+ , customer
+ , customer_demographics cd1
+ , customer_demographics cd2
+ , promotion
+ , household_demographics hd1
+ , household_demographics hd2
+ , customer_address ad1
+ , customer_address ad2
+ , income_band ib1
+ , income_band ib2
+ , item
+ WHERE (ss_store_sk = s_store_sk)
+ AND (ss_sold_date_sk = d1.d_date_sk)
+ AND (ss_customer_sk = c_customer_sk)
+ AND (ss_cdemo_sk = cd1.cd_demo_sk)
+ AND (ss_hdemo_sk = hd1.hd_demo_sk)
+ AND (ss_addr_sk = ad1.ca_address_sk)
+ AND (ss_item_sk = i_item_sk)
+ AND (ss_item_sk = sr_item_sk)
+ AND (ss_ticket_number = sr_ticket_number)
+ AND (ss_item_sk = cs_ui.cs_item_sk)
+ AND (c_current_cdemo_sk = cd2.cd_demo_sk)
+ AND (c_current_hdemo_sk = hd2.hd_demo_sk)
+ AND (c_current_addr_sk = ad2.ca_address_sk)
+ AND (c_first_sales_date_sk = d2.d_date_sk)
+ AND (c_first_shipto_date_sk = d3.d_date_sk)
+ AND (ss_promo_sk = p_promo_sk)
+ AND (hd1.hd_income_band_sk = ib1.ib_income_band_sk)
+ AND (hd2.hd_income_band_sk = ib2.ib_income_band_sk)
+ AND (cd1.cd_marital_status <> cd2.cd_marital_status)
+ AND (i_color IN ('purple' , 'burlywood' , 'indian'
, 'spring' , 'floral' , 'medium'))
+ AND (i_current_price BETWEEN 64 AND (64 + 10))
+ AND (i_current_price BETWEEN (64 + 1) AND (64 + 15))
+ )
+ SELECT
+ cs1.product_name
+ , cs1.store_name
+ , cs1.store_zip
+ , cs1.b_street_number
+ , cs1.b_street_name
+ , cs1.b_city
+ , cs1.b_zip
+ , cs1.c_street_number
+ , cs1.c_street_name
+ , cs1.c_city
+ , cs1.c_zip
+ , cs1.syear
+ , cs2.syear
+ FROM
+ cross_sales cs1
+ , cross_sales cs2
+ , cross_sales cs3
+ WHERE (cs1.item_sk = cs2.item_sk)
+ AND (cs1.syear = 1999)
+ AND (cs2.syear = (1999 + 1))
+ AND (cs1.store_name = cs2.store_name)
+ AND (cs1.store_zip = cs2.store_zip)
+ AND (cs1.store_name = cs3.store_name)
+ AND (cs1.store_zip = cs3.store_zip);
+ """)
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]