This is an automated email from the ASF dual-hosted git repository. chenyz pushed a commit to branch parallel_multi_child_node in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit c61ebba990f707e547c554c63c82601d1bd45835 Author: Chen YZ <[email protected]> AuthorDate: Mon Feb 24 17:34:32 2025 +0800 cherry pick --- .../operator/process/CollectOperator.java | 7 +++++++ .../SimpleFragmentParallelPlanner.java | 2 +- .../plan/planner/plan/PlanFragment.java | 22 +++++++++++++++++----- .../planner/distribute/AddExchangeNodes.java | 22 ++++++++-------------- .../distribute/TableDistributedPlanner.java | 12 +++++++++--- .../distribute/TableModelQueryFragmentPlanner.java | 17 +++++++++++++---- 6 files changed, 55 insertions(+), 27 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/process/CollectOperator.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/process/CollectOperator.java index 4b0ecf27c37..5c287332053 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/process/CollectOperator.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/operator/process/CollectOperator.java @@ -36,6 +36,7 @@ public class CollectOperator implements ProcessOperator { private final OperatorContext operatorContext; private final List<Operator> children; + private boolean inited = false; private int currentIndex; @@ -68,6 +69,12 @@ public class CollectOperator implements ProcessOperator { @Override public ListenableFuture<?> isBlocked() { + if (!inited) { + inited = true; + for (Operator child : children) { + child.isBlocked(); + } + } if (currentIndex >= children.size()) { return NOT_BLOCKED; } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/planner/distribution/SimpleFragmentParallelPlanner.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/planner/distribution/SimpleFragmentParallelPlanner.java index e955f5f5a22..a7fd5ef9671 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/planner/distribution/SimpleFragmentParallelPlanner.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/planner/distribution/SimpleFragmentParallelPlanner.java @@ -151,7 +151,7 @@ public class SimpleFragmentParallelPlanner implements IFragmentParallelPlaner { // Get the target region for origin PlanFragment, then its instance will be distributed one // of them. - TRegionReplicaSet regionReplicaSet = fragment.getTargetRegion(); + TRegionReplicaSet regionReplicaSet = fragment.getTargetRegionForTreeModel(); // Set ExecutorType and target host for the instance // We need to store all the replica host in case of the scenario that the instance need to be diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/planner/plan/PlanFragment.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/planner/plan/PlanFragment.java index c181dc2636a..44f8f23fda8 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/planner/plan/PlanFragment.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/planner/plan/PlanFragment.java @@ -25,8 +25,10 @@ import org.apache.iotdb.commons.partition.DataPartition; import org.apache.iotdb.db.queryengine.common.PlanFragmentId; import org.apache.iotdb.db.queryengine.plan.analyze.TypeProvider; import org.apache.iotdb.db.queryengine.plan.planner.SubPlanTypeExtractor; +import org.apache.iotdb.db.queryengine.plan.planner.distribution.NodeDistribution; import org.apache.iotdb.db.queryengine.plan.planner.plan.node.IPartitionRelatedNode; import org.apache.iotdb.db.queryengine.plan.planner.plan.node.PlanNode; +import org.apache.iotdb.db.queryengine.plan.planner.plan.node.PlanNodeId; import org.apache.iotdb.db.queryengine.plan.planner.plan.node.PlanNodeType; import org.apache.iotdb.db.queryengine.plan.planner.plan.node.source.AlignedSeriesAggregationScanNode; import org.apache.iotdb.db.queryengine.plan.planner.plan.node.source.AlignedSeriesScanNode; @@ -39,6 +41,8 @@ import org.apache.tsfile.utils.ReadWriteIOUtils; import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Map; import java.util.Objects; import static com.google.common.base.Preconditions.checkArgument; @@ -107,8 +111,13 @@ public class PlanFragment { // In current version, one PlanFragment should contain at least one SourceNode, // and the DataRegions of all SourceNodes should be same in one PlanFragment. // So we can use the DataRegion of one SourceNode as the PlanFragment's DataRegion. - public TRegionReplicaSet getTargetRegion() { - return getNodeRegion(planNodeTree); + public TRegionReplicaSet getTargetRegionForTreeModel() { + return getNodeRegion(planNodeTree, Collections.emptyMap()); + } + + public TRegionReplicaSet getTargetRegionForTableModel( + final Map<PlanNodeId, NodeDistribution> nodeDistributionMap) { + return getNodeRegion(planNodeTree, nodeDistributionMap); } // If a Fragment is not related with DataPartition, @@ -118,12 +127,15 @@ public class PlanFragment { return getNodeLocation(planNodeTree); } - private TRegionReplicaSet getNodeRegion(PlanNode root) { - if (root instanceof IPartitionRelatedNode) { + private TRegionReplicaSet getNodeRegion( + PlanNode root, final Map<PlanNodeId, NodeDistribution> nodeDistributionMap) { + if (nodeDistributionMap.containsKey(root.getPlanNodeId())) { + return nodeDistributionMap.get(root.getPlanNodeId()).getRegion(); + } else if (root instanceof IPartitionRelatedNode) { return ((IPartitionRelatedNode) root).getRegionReplicaSet(); } for (PlanNode child : root.getChildren()) { - TRegionReplicaSet result = getNodeRegion(child); + TRegionReplicaSet result = getNodeRegion(child, nodeDistributionMap); if (result != null && result != DataPartition.NOT_ASSIGNED) { return result; } diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/AddExchangeNodes.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/AddExchangeNodes.java index 98b8a7c00d8..5d7278485f2 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/AddExchangeNodes.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/AddExchangeNodes.java @@ -19,7 +19,6 @@ package org.apache.iotdb.db.queryengine.plan.relational.planner.distribute; -import org.apache.iotdb.common.rpc.thrift.TRegionReplicaSet; import org.apache.iotdb.commons.partition.DataPartition; import org.apache.iotdb.db.queryengine.common.MPPQueryContext; import org.apache.iotdb.db.queryengine.plan.planner.distribution.NodeDistribution; @@ -74,20 +73,15 @@ public class AddExchangeNodes for (PlanNode child : node.getChildren()) { PlanNode rewriteNode = child.accept(this, context); - - TRegionReplicaSet region = - context.nodeDistributionMap.get(rewriteNode.getPlanNodeId()).getRegion(); - if (!region.equals(context.mostUsedRegion)) { - ExchangeNode exchangeNode = new ExchangeNode(queryContext.getQueryId().genPlanNodeId()); - exchangeNode.addChild(rewriteNode); - exchangeNode.setOutputSymbols(rewriteNode.getOutputSymbols()); - newNode.addChild(exchangeNode); - context.hasExchangeNode = true; - } else { - newNode.addChild(rewriteNode); - } + ExchangeNode exchangeNode = new ExchangeNode(queryContext.getQueryId().genPlanNodeId()); + exchangeNode.addChild(rewriteNode); + exchangeNode.setOutputSymbols(rewriteNode.getOutputSymbols()); + newNode.addChild(exchangeNode); + context.hasExchangeNode = true; + context.nodeDistributionMap.put( + exchangeNode.getPlanNodeId(), + new NodeDistribution(SAME_WITH_SOME_CHILD, context.mostUsedRegion)); } - context.nodeDistributionMap.put( node.getPlanNodeId(), new NodeDistribution(SAME_WITH_SOME_CHILD, context.mostUsedRegion)); diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/TableDistributedPlanner.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/TableDistributedPlanner.java index b537aaefdef..f1b903b3047 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/TableDistributedPlanner.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/TableDistributedPlanner.java @@ -24,12 +24,14 @@ import org.apache.iotdb.commons.utils.TestOnly; import org.apache.iotdb.db.queryengine.common.MPPQueryContext; import org.apache.iotdb.db.queryengine.execution.exchange.sink.DownStreamChannelLocation; import org.apache.iotdb.db.queryengine.plan.analyze.QueryType; +import org.apache.iotdb.db.queryengine.plan.planner.distribution.NodeDistribution; import org.apache.iotdb.db.queryengine.plan.planner.distribution.WriteFragmentParallelPlanner; import org.apache.iotdb.db.queryengine.plan.planner.plan.DistributedQueryPlan; import org.apache.iotdb.db.queryengine.plan.planner.plan.FragmentInstance; import org.apache.iotdb.db.queryengine.plan.planner.plan.LogicalQueryPlan; import org.apache.iotdb.db.queryengine.plan.planner.plan.SubPlan; import org.apache.iotdb.db.queryengine.plan.planner.plan.node.PlanNode; +import org.apache.iotdb.db.queryengine.plan.planner.plan.node.PlanNodeId; import org.apache.iotdb.db.queryengine.plan.planner.plan.node.WritePlanNode; import org.apache.iotdb.db.queryengine.plan.planner.plan.node.sink.IdentitySinkNode; import org.apache.iotdb.db.queryengine.plan.relational.analyzer.Analysis; @@ -109,7 +111,7 @@ public class TableDistributedPlanner { adjustUpStream(outputNodeWithExchange, planContext); - return generateDistributedPlan(outputNodeWithExchange); + return generateDistributedPlan(outputNodeWithExchange, planContext.nodeDistributionMap); } public PlanNode generateDistributedPlanWithOptimize( @@ -152,7 +154,9 @@ public class TableDistributedPlanner { return new AddExchangeNodes(mppQueryContext).addExchangeNodes(distributedPlan, planContext); } - private DistributedQueryPlan generateDistributedPlan(PlanNode outputNodeWithExchange) { + private DistributedQueryPlan generateDistributedPlan( + PlanNode outputNodeWithExchange, + final Map<PlanNodeId, NodeDistribution> nodeDistributionMap) { // generate subPlan SubPlan subPlan = new SubPlanGenerator() @@ -162,7 +166,9 @@ public class TableDistributedPlanner { // generate fragment instances List<FragmentInstance> fragmentInstances = mppQueryContext.getQueryType() == QueryType.READ - ? new TableModelQueryFragmentPlanner(subPlan, analysis, mppQueryContext).plan() + ? new TableModelQueryFragmentPlanner( + subPlan, analysis, mppQueryContext, nodeDistributionMap) + .plan() : new WriteFragmentParallelPlanner( subPlan, analysis, mppQueryContext, WritePlanNode::splitByPartition) .parallelPlan(); diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/TableModelQueryFragmentPlanner.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/TableModelQueryFragmentPlanner.java index e6344f33a2a..85fe530577e 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/TableModelQueryFragmentPlanner.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/relational/planner/distribute/TableModelQueryFragmentPlanner.java @@ -30,6 +30,7 @@ import org.apache.iotdb.db.queryengine.common.MPPQueryContext; import org.apache.iotdb.db.queryengine.common.PlanFragmentId; import org.apache.iotdb.db.queryengine.execution.exchange.sink.DownStreamChannelLocation; import org.apache.iotdb.db.queryengine.plan.analyze.QueryType; +import org.apache.iotdb.db.queryengine.plan.planner.distribution.NodeDistribution; import org.apache.iotdb.db.queryengine.plan.planner.plan.FragmentInstance; import org.apache.iotdb.db.queryengine.plan.planner.plan.PlanFragment; import org.apache.iotdb.db.queryengine.plan.planner.plan.SubPlan; @@ -74,10 +75,17 @@ public class TableModelQueryFragmentPlanner { // Record FragmentInstances dispatched to same DataNode private final Map<TDataNodeLocation, List<FragmentInstance>> dataNodeFIMap = new HashMap<>(); - TableModelQueryFragmentPlanner(SubPlan subPlan, Analysis analysis, MPPQueryContext queryContext) { + private final Map<PlanNodeId, NodeDistribution> nodeDistributionMap; + + TableModelQueryFragmentPlanner( + SubPlan subPlan, + Analysis analysis, + MPPQueryContext queryContext, + final Map<PlanNodeId, NodeDistribution> nodeDistributionMap) { this.subPlan = subPlan; this.analysis = analysis; this.queryContext = queryContext; + this.nodeDistributionMap = nodeDistributionMap; } public List<FragmentInstance> plan() { @@ -89,7 +97,7 @@ public class TableModelQueryFragmentPlanner { private void prepare() { for (PlanFragment fragment : subPlan.getPlanFragmentList()) { recordPlanNodeRelation(fragment.getPlanNodeTree(), fragment.getId()); - produceFragmentInstance(fragment); + produceFragmentInstance(fragment, nodeDistributionMap); } fragmentInstanceList.forEach( @@ -101,7 +109,8 @@ public class TableModelQueryFragmentPlanner { root.getChildren().forEach(child -> recordPlanNodeRelation(child, planFragmentId)); } - private void produceFragmentInstance(PlanFragment fragment) { + private void produceFragmentInstance( + PlanFragment fragment, final Map<PlanNodeId, NodeDistribution> nodeDistributionMap) { FragmentInstance fragmentInstance = new FragmentInstance( fragment, @@ -114,7 +123,7 @@ public class TableModelQueryFragmentPlanner { // Get the target region for origin PlanFragment, then its instance will be distributed one // of them. - TRegionReplicaSet regionReplicaSet = fragment.getTargetRegion(); + TRegionReplicaSet regionReplicaSet = fragment.getTargetRegionForTableModel(nodeDistributionMap); // Set ExecutorType and target host for the instance, // We need to store all the replica host in case of the scenario that the instance need to be
