silundong commented on code in PR #4204:
URL: https://github.com/apache/calcite/pull/4204#discussion_r1966974085
##########
core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java:
##########
@@ -9546,4 +9546,21 @@ private void
checkJoinAssociateRuleWithTopAlwaysTrueCondition(boolean allowAlway
.withRule(CoreRules.MULTI_JOIN_OPTIMIZE)
.check();
}
+
+ @Test void testDphypJoinReorder() {
Review Comment:
Sure, I will update it.
##########
core/src/main/java/org/apache/calcite/rel/rules/DpHyp.java:
##########
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ * The core process of dphyp enumeration algorithm.
+ */
+public class DpHyp {
+
+ private HyperGraph hyperGraph;
+
+ private HashMap<Long, RelNode> dpTable;
+
+ private RelBuilder builder;
+
+ private RelMetadataQuery mq;
+
+ public DpHyp(HyperGraph hyperGraph, RelBuilder builder, RelMetadataQuery
relMetadataQuery) {
+ this.hyperGraph = hyperGraph;
+ this.dpTable = new HashMap<>();
+ this.builder = builder;
+ this.mq = relMetadataQuery;
+ }
+
+ public void startEnumerateJoin() {
+ int size = hyperGraph.getInputs().size();
+ for (int i = 0; i < size; i++) {
+ long singleNode = LongBitmap.newBitmap(i);
+ dpTable.put(singleNode, hyperGraph.getInput(i));
+ hyperGraph.initEdgeBitMap(singleNode);
+ }
+
+ // start enumerating from the second to last
+ for (int i = size - 2; i >= 0; i--) {
+ long csg = LongBitmap.newBitmap(i);
+ long forbidden = csg - 1;
+ emitCsg(csg);
+ enumerateCsgRec(csg, forbidden);
+ }
+ }
+
+ private void emitCsg(long csg) {
+ long forbidden = csg | LongBitmap.getBvBitmap(csg);
+ long neighbors = hyperGraph.getNeighborBitmap(csg, forbidden);
+
+ LongBitmap.ReverseIterator reverseIterator = new
LongBitmap.ReverseIterator(neighbors);
+ for (long cmp : reverseIterator) {
+ List<HyperEdge> edges = hyperGraph.connectCsgCmp(csg, cmp);
+ if (!edges.isEmpty()) {
+ emitCsgCmp(csg, cmp, edges);
+ }
+ // forbidden the nodes that smaller than current cmp when extend cmp,
e.g.
+ // neighbors = {t1, t2}, t1 and t2 are connected.
+ // when extented t2, we will get (t1, t2)
+ // when extented t1, we will get (t1, t2) repeated
+ long newForbidden =
+ (cmp | LongBitmap.getBvBitmap(cmp)) & neighbors;
+ newForbidden = newForbidden | forbidden;
+ enumerateCmpRec(csg, cmp, newForbidden);
+ }
+ }
+
+ private void enumerateCsgRec(long csg, long forbidden) {
+ long neighbors = hyperGraph.getNeighborBitmap(csg, forbidden);
+ LongBitmap.SubsetIterator subsetIterator = new
LongBitmap.SubsetIterator(neighbors);
+ for (long subNeighbor : subsetIterator) {
+ hyperGraph.updateEdgesForUnion(csg, subNeighbor);
+ long newCsg = csg | subNeighbor;
+ if (dpTable.containsKey(newCsg)) {
+ emitCsg(newCsg);
+ }
+ }
+ long newForbidden = forbidden | neighbors;
+ subsetIterator.reset();
+ for (long subNeighbor : subsetIterator) {
+ long newCsg = csg | subNeighbor;
+ enumerateCsgRec(newCsg, newForbidden);
+ }
+ }
+
+ private void enumerateCmpRec(long csg, long cmp, long forbidden) {
+ long neighbors = hyperGraph.getNeighborBitmap(cmp, forbidden);
+ LongBitmap.SubsetIterator subsetIterator = new
LongBitmap.SubsetIterator(neighbors);
+ for (long subNeighbor : subsetIterator) {
+ long newCmp = cmp | subNeighbor;
+ hyperGraph.updateEdgesForUnion(cmp, subNeighbor);
+ if (dpTable.containsKey(newCmp)) {
+ List<HyperEdge> edges = hyperGraph.connectCsgCmp(csg, newCmp);
+ if (!edges.isEmpty()) {
+ emitCsgCmp(csg, newCmp, edges);
+ }
+ }
+ }
+ long newForbidden = forbidden | neighbors;
+ subsetIterator.reset();
+ for (long subNeighbor : subsetIterator) {
+ long newCmp = cmp | subNeighbor;
+ enumerateCmpRec(csg, newCmp, newForbidden);
+ }
+ }
+
+ private void emitCsgCmp(long csg, long cmp, List<HyperEdge> edges) {
+ RelNode child1 = dpTable.get(csg);
+ RelNode child2 = dpTable.get(cmp);
+ if (child1 == null || child2 == null) {
+ throw new IllegalArgumentException(
+ "csg and cmp were not enumerated in the previous dp process");
+ }
+
+ JoinRelType joinType = hyperGraph.extractJoinType(edges);
+ if (joinType == null) {
+ return;
+ }
+ RexNode joinCond1 = hyperGraph.extractJoinCond(child1, child2, edges);
+ RelNode newPlan1 = builder
+ .push(child1)
+ .push(child2)
+ .join(joinType, joinCond1)
+ .build();
+
+ // swap left and right
+ RexNode joinCond2 = hyperGraph.extractJoinCond(child2, child1, edges);
+ RelNode newPlan2 = builder
+ .push(child2)
+ .push(child1)
+ .join(joinType, joinCond2)
+ .build();
+ RelNode winPlan = chooseBetterPlan(newPlan1, newPlan2);
+
+ RelNode oriPlan = dpTable.get(csg | cmp);
+ if (oriPlan != null) {
+ winPlan = chooseBetterPlan(winPlan, oriPlan);
+ }
+ dpTable.put(csg | cmp, winPlan);
+ }
+
+ public @Nullable RelNode getBestPlan() {
+ int size = hyperGraph.getInputs().size();
+ long wholeGraph = LongBitmap.newBitmapBetween(0, size);
+ return dpTable.get(wholeGraph);
+ }
+
+ private RelNode chooseBetterPlan(RelNode plan1, RelNode plan2) {
+ RelOptCost cost1 = mq.getCumulativeCost(plan1);
+ RelOptCost cost2 = mq.getCumulativeCost(plan2);
+ if (cost1 == null || cost2 == null) {
Review Comment:
I will update it.
##########
core/src/main/java/org/apache/calcite/rel/rules/DphypJoinReorderRule.java:
##########
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.rel.rules;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+
+import org.immutables.value.Value;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Rule that re-orders a {@link Join} tree using dphyp algorithm.
+ *
+ * @see CoreRules#HYPER_GRAPH_OPTIMIZE */
[email protected]
+public class DphypJoinReorderRule
+ extends RelRule<DphypJoinReorderRule.Config>
+ implements TransformationRule {
+
+ protected DphypJoinReorderRule(Config config) {
+ super(config);
+ }
+
+ @Override public void onMatch(RelOptRuleCall call) {
+ HyperGraph hyperGraph = call.rel(0);
+ RelBuilder relBuilder = call.builder();
+ // make all field name unique and convert the
+ // HyperEdge condition from RexInputRef to RexInputFieldName
+ hyperGraph.convertHyperEdgeCond();
+
+ // enumerate by Dphyp
+ DpHyp dpHyp = new DpHyp(hyperGraph, relBuilder, call.getMetadataQuery());
+ dpHyp.startEnumerateJoin();
+ RelNode orderedJoin = dpHyp.getBestPlan();
+ if (orderedJoin == null) {
+ return;
Review Comment:
My original intention is that when the query is too complex, even dphyp can
be very time-consuming. So, if the execution of `dpHyp.startEnumerateJoin`
exceeds a certain time limit, the enumeration process should be forcibly
terminated. In this case, the dp table may not have results yet. After
returning null, the hypergraph simplification algorithm might be executed.
Alternatively, the rules related to dphyp could be integrated into a single
method, with the original Join tree as the input parameter. If the optimization
times out, the original Join tree is returned, and other Join reorder
algorithms can be called subsequently.
Do you have any suggestions for this? I will update it accordingly.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]