[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-03-02 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r586086067



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/EdgeManagerBuildUtil.java
##
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.executiongraph;
+
+import org.apache.flink.runtime.jobgraph.DistributionPattern;
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
+import org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup;
+import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/** Utilities for building {@link EdgeManager}. */
+public class EdgeManagerBuildUtil {
+
+public static void connectVertexToResult(
+ExecutionJobVertex vertex,
+IntermediateResult ires,
+int inputNumber,
+DistributionPattern distributionPattern) {
+
+switch (distributionPattern) {
+case POINTWISE:
+connectPointwise(vertex.getTaskVertices(), ires, inputNumber);
+break;
+case ALL_TO_ALL:
+connectAllToAll(vertex.getTaskVertices(), ires, inputNumber);
+break;
+default:
+throw new RuntimeException("Unrecognized distribution 
pattern.");
+}
+}
+
+private static void connectAllToAll(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+ConsumedPartitionGroup consumedPartitions =
+new ConsumedPartitionGroup(
+Arrays.stream(ires.getPartitions())
+
.map(IntermediateResultPartition::getPartitionId)
+.collect(Collectors.toList()));
+for (ExecutionVertex ev : taskVertices) {
+ev.addConsumedPartitions(consumedPartitions, inputNumber);
+}
+
+ConsumerVertexGroup vertices =
+new ConsumerVertexGroup(
+Arrays.stream(taskVertices)
+.map(ExecutionVertex::getID)
+.collect(Collectors.toList()));
+for (IntermediateResultPartition partition : ires.getPartitions()) {
+partition.addConsumers(vertices);
+}
+}
+
+private static void connectPointwise(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+final int sourceCount = ires.getPartitions().length;
+final int targetCount = taskVertices.length;
+
+if (sourceCount == targetCount) {
+for (int i = 0; i < sourceCount; i++) {
+ExecutionVertex executionVertex = taskVertices[i];
+IntermediateResultPartition partition = 
ires.getPartitions()[i];
+
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+partition.addConsumers(consumerVertexGroup);
+
+ConsumedPartitionGroup consumedPartitionGroup =
+new ConsumedPartitionGroup(partition.getPartitionId());
+executionVertex.addConsumedPartitions(consumedPartitionGroup, 
inputNumber);
+}
+} else if (sourceCount > targetCount) {
+for (int index = 0; index < targetCount; index++) {
+
+ExecutionVertex executionVertex = taskVertices[index];
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+
+int start = index * sourceCount / targetCount;
+int end = (index + 1) * sourceCount / targetCount;
+
+List consumedPartitions =
+new ArrayList<>(end - start);
+
+for (int i = start; i < end; i++) {
+IntermediateResultPartition partition = 
ires.getPartitions()

[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-03-02 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r584430325



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/EdgeManager.java
##
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.executiongraph;
+
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
+import org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup;
+import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.flink.util.Preconditions.checkState;
+
+/** Class that manages all the connections between tasks. */
+public class EdgeManager {
+
+private final Map> partitionConsumers =
+new HashMap<>();
+
+private final Map> 
vertexConsumedPartitions =
+new HashMap<>();
+
+public void addPartitionConsumers(
+IntermediateResultPartitionID resultPartitionId, 
ConsumerVertexGroup consumerVertices) {
+
+checkState(!partitionConsumers.containsKey(resultPartitionId));
+
+final List consumers = 
getPartitionConsumers(resultPartitionId);
+
+// sanity check
+checkState(
+consumers.size() == 0,
+"Currently there has to be exactly one consumer in real jobs");
+
+consumers.add(consumerVertices);
+}
+
+public void addVertexConsumedPartitions(
+ExecutionVertexID executionVertexId,
+ConsumedPartitionGroup partitions,
+int inputNumber) {
+
+final List consumedPartitions =
+getVertexConsumedPartitions(executionVertexId);
+
+// sanity check
+checkState(consumedPartitions.size() == inputNumber);

Review comment:
   Yes, this order is redundant, there is no limitation about order before. 
I prefer to remove `inputNumber` from the parameters, since currently in 
`EdgeManagerBuildUtils` ConsumedPartitionGroup is added one-by-one per JobEdge.

##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/EdgeManager.java
##
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.executiongraph;
+
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
+import org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup;
+import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.flink.util.Preconditions.checkState;
+
+/** Class that manages all the connections between tasks. */
+public class EdgeManager {
+
+private final Map> partitionConsumers =
+new HashMap<>();
+
+private final Map> 
vertexConsumedPartitions =
+new HashMap<>();
+
+public void addPartitionConsumers(
+IntermediateResultPartitionID resultPartitionId, 
ConsumerVertexGroup consumerVertices) {
+
+checkState(!partitionConsumers.containsKey(resultPartitionId));
+
+final List consumers = 
getPartiti

[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-02-25 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r582790425



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/EdgeManagerBuildUtil.java
##
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.executiongraph;
+
+import org.apache.flink.runtime.jobgraph.DistributionPattern;
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
+import org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup;
+import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/** Utilities for building {@link EdgeManager}. */
+public class EdgeManagerBuildUtil {
+
+public static void registerToExecutionEdgeManager(
+ExecutionVertex[] taskVertices,
+IntermediateResult ires,
+int inputNumber,
+DistributionPattern distributionPattern) {
+
+switch (distributionPattern) {
+case POINTWISE:
+connectPointwise(taskVertices, ires, inputNumber);
+break;
+case ALL_TO_ALL:
+connectAllToAll(taskVertices, ires, inputNumber);
+break;
+default:
+throw new RuntimeException("Unrecognized distribution 
pattern.");
+}
+}
+
+private static void connectAllToAll(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+ConsumedPartitionGroup consumedPartitions =
+new ConsumedPartitionGroup(
+Arrays.stream(ires.getPartitions())
+
.map(IntermediateResultPartition::getPartitionId)
+.collect(Collectors.toList()));
+for (ExecutionVertex ev : taskVertices) {
+ev.setConsumedPartitions(consumedPartitions, inputNumber);
+}
+
+ConsumerVertexGroup vertices =
+new ConsumerVertexGroup(
+Arrays.stream(taskVertices)
+.map(ExecutionVertex::getID)
+.collect(Collectors.toList()));
+for (IntermediateResultPartition partition : ires.getPartitions()) {
+partition.setConsumers(vertices);
+}
+}
+
+private static void connectPointwise(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+final int sourceCount = ires.getPartitions().length;
+final int targetCount = taskVertices.length;
+
+if (sourceCount == targetCount) {
+for (int i = 0; i < sourceCount; i++) {
+ExecutionVertex executionVertex = taskVertices[i];
+IntermediateResultPartition partition = 
ires.getPartitions()[i];
+
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+partition.setConsumers(consumerVertexGroup);
+
+ConsumedPartitionGroup consumedPartitionGroup =
+new ConsumedPartitionGroup(partition.getPartitionId());
+executionVertex.setConsumedPartitions(consumedPartitionGroup, 
inputNumber);
+}
+} else if (sourceCount > targetCount) {
+for (int index = 0; index < targetCount; index++) {
+
+ExecutionVertex executionVertex = taskVertices[index];
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+
+List consumedPartitions =
+new ArrayList<>(sourceCount / targetCount + 1);
+
+if (sourceCount % targetCount == 0) {

Review comment:
   @zhuzhurk Thanks for the suggestion. `PointwisePatternTest` has been 
improved.





This is an automated message from

[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-02-24 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r582556815



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/EdgeManagerBuildUtil.java
##
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.executiongraph;
+
+import org.apache.flink.runtime.jobgraph.DistributionPattern;
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
+import org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup;
+import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/** Utilities for building {@link EdgeManager}. */
+public class EdgeManagerBuildUtil {
+
+public static void registerToExecutionEdgeManager(
+ExecutionVertex[] taskVertices,
+IntermediateResult ires,
+int inputNumber,
+DistributionPattern distributionPattern) {
+
+switch (distributionPattern) {
+case POINTWISE:
+connectPointwise(taskVertices, ires, inputNumber);
+break;
+case ALL_TO_ALL:
+connectAllToAll(taskVertices, ires, inputNumber);
+break;
+default:
+throw new RuntimeException("Unrecognized distribution 
pattern.");
+}
+}
+
+private static void connectAllToAll(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+ConsumedPartitionGroup consumedPartitions =
+new ConsumedPartitionGroup(
+Arrays.stream(ires.getPartitions())
+
.map(IntermediateResultPartition::getPartitionId)
+.collect(Collectors.toList()));
+for (ExecutionVertex ev : taskVertices) {
+ev.setConsumedPartitions(consumedPartitions, inputNumber);
+}
+
+ConsumerVertexGroup vertices =
+new ConsumerVertexGroup(
+Arrays.stream(taskVertices)
+.map(ExecutionVertex::getID)
+.collect(Collectors.toList()));
+for (IntermediateResultPartition partition : ires.getPartitions()) {
+partition.setConsumers(vertices);
+}
+}
+
+private static void connectPointwise(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+final int sourceCount = ires.getPartitions().length;
+final int targetCount = taskVertices.length;
+
+if (sourceCount == targetCount) {
+for (int i = 0; i < sourceCount; i++) {
+ExecutionVertex executionVertex = taskVertices[i];
+IntermediateResultPartition partition = 
ires.getPartitions()[i];
+
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+partition.setConsumers(consumerVertexGroup);
+
+ConsumedPartitionGroup consumedPartitionGroup =
+new ConsumedPartitionGroup(partition.getPartitionId());
+executionVertex.setConsumedPartitions(consumedPartitionGroup, 
inputNumber);
+}
+} else if (sourceCount > targetCount) {
+for (int index = 0; index < targetCount; index++) {
+
+ExecutionVertex executionVertex = taskVertices[index];
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+
+List consumedPartitions =
+new ArrayList<>(sourceCount / targetCount + 1);
+
+if (sourceCount % targetCount == 0) {

Review comment:
   Thanks for the suggestion. I've improved the `PointwisePatternTest` and 
the logics in `EdgeManagerBuilUtil`.





Thi

[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-02-23 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r581676700



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/EdgeManagerBuildUtil.java
##
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.executiongraph;
+
+import org.apache.flink.runtime.jobgraph.DistributionPattern;
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
+import org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup;
+import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/** Utilities for building {@link EdgeManager}. */
+public class EdgeManagerBuildUtil {
+
+public static void registerToExecutionEdgeManager(
+ExecutionVertex[] taskVertices,
+IntermediateResult ires,
+int inputNumber,
+DistributionPattern distributionPattern) {
+
+switch (distributionPattern) {
+case POINTWISE:
+connectPointwise(taskVertices, ires, inputNumber);
+break;
+case ALL_TO_ALL:
+connectAllToAll(taskVertices, ires, inputNumber);
+break;
+default:
+throw new RuntimeException("Unrecognized distribution 
pattern.");
+}
+}
+
+private static void connectAllToAll(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+ConsumedPartitionGroup consumedPartitions =
+new ConsumedPartitionGroup(
+Arrays.stream(ires.getPartitions())
+
.map(IntermediateResultPartition::getPartitionId)
+.collect(Collectors.toList()));
+for (ExecutionVertex ev : taskVertices) {
+ev.setConsumedPartitions(consumedPartitions, inputNumber);
+}
+
+ConsumerVertexGroup vertices =
+new ConsumerVertexGroup(
+Arrays.stream(taskVertices)
+.map(ExecutionVertex::getID)
+.collect(Collectors.toList()));
+for (IntermediateResultPartition partition : ires.getPartitions()) {
+partition.setConsumers(vertices);
+}
+}
+
+private static void connectPointwise(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+final int sourceCount = ires.getPartitions().length;
+final int targetCount = taskVertices.length;
+
+if (sourceCount == targetCount) {
+for (int i = 0; i < sourceCount; i++) {
+ExecutionVertex executionVertex = taskVertices[i];
+IntermediateResultPartition partition = 
ires.getPartitions()[i];
+
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+partition.setConsumers(consumerVertexGroup);
+
+ConsumedPartitionGroup consumedPartitionGroup =
+new ConsumedPartitionGroup(partition.getPartitionId());
+executionVertex.setConsumedPartitions(consumedPartitionGroup, 
inputNumber);
+}
+} else if (sourceCount > targetCount) {
+for (int index = 0; index < targetCount; index++) {
+
+ExecutionVertex executionVertex = taskVertices[index];
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+
+List consumedPartitions =
+new ArrayList<>(sourceCount / targetCount + 1);
+
+if (sourceCount % targetCount == 0) {

Review comment:
   Totally agreed. Thanks for providing a better solution. I've validate it 
with several common cases. I'm wondering that could we simplify the situation 
`sourceCount > targetCount` i

[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-02-23 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r581615458



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/EdgeManagerBuildUtil.java
##
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.executiongraph;
+
+import org.apache.flink.runtime.jobgraph.DistributionPattern;
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
+import org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup;
+import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/** Utilities for building {@link EdgeManager}. */
+public class EdgeManagerBuildUtil {
+
+public static void registerToExecutionEdgeManager(
+ExecutionVertex[] taskVertices,
+IntermediateResult ires,
+int inputNumber,
+DistributionPattern distributionPattern) {
+
+switch (distributionPattern) {
+case POINTWISE:
+connectPointwise(taskVertices, ires, inputNumber);
+break;
+case ALL_TO_ALL:
+connectAllToAll(taskVertices, ires, inputNumber);
+break;
+default:
+throw new RuntimeException("Unrecognized distribution 
pattern.");
+}
+}
+
+private static void connectAllToAll(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+ConsumedPartitionGroup consumedPartitions =
+new ConsumedPartitionGroup(
+Arrays.stream(ires.getPartitions())
+
.map(IntermediateResultPartition::getPartitionId)
+.collect(Collectors.toList()));
+for (ExecutionVertex ev : taskVertices) {
+ev.setConsumedPartitions(consumedPartitions, inputNumber);
+}
+
+ConsumerVertexGroup vertices =
+new ConsumerVertexGroup(
+Arrays.stream(taskVertices)
+.map(ExecutionVertex::getID)
+.collect(Collectors.toList()));
+for (IntermediateResultPartition partition : ires.getPartitions()) {
+partition.setConsumers(vertices);
+}
+}
+
+private static void connectPointwise(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+final int sourceCount = ires.getPartitions().length;
+final int targetCount = taskVertices.length;
+
+if (sourceCount == targetCount) {
+for (int i = 0; i < sourceCount; i++) {
+ExecutionVertex executionVertex = taskVertices[i];
+IntermediateResultPartition partition = 
ires.getPartitions()[i];
+
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+partition.setConsumers(consumerVertexGroup);
+
+ConsumedPartitionGroup consumedPartitionGroup =
+new ConsumedPartitionGroup(partition.getPartitionId());
+executionVertex.setConsumedPartitions(consumedPartitionGroup, 
inputNumber);
+}
+} else if (sourceCount > targetCount) {
+for (int index = 0; index < targetCount; index++) {
+
+ExecutionVertex executionVertex = taskVertices[index];
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+
+List consumedPartitions =
+new ArrayList<>(sourceCount / targetCount + 1);
+
+if (sourceCount % targetCount == 0) {

Review comment:
   I'm wondering could we replace the logic of `sourceCount < targetCount` 
in this way?





This is an automated messag

[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-02-23 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r581615048



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/EdgeManagerBuildUtil.java
##
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.executiongraph;
+
+import org.apache.flink.runtime.jobgraph.DistributionPattern;
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
+import org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup;
+import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/** Utilities for building {@link EdgeManager}. */
+public class EdgeManagerBuildUtil {
+
+public static void registerToExecutionEdgeManager(
+ExecutionVertex[] taskVertices,
+IntermediateResult ires,
+int inputNumber,
+DistributionPattern distributionPattern) {
+
+switch (distributionPattern) {
+case POINTWISE:
+connectPointwise(taskVertices, ires, inputNumber);
+break;
+case ALL_TO_ALL:
+connectAllToAll(taskVertices, ires, inputNumber);
+break;
+default:
+throw new RuntimeException("Unrecognized distribution 
pattern.");
+}
+}
+
+private static void connectAllToAll(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+ConsumedPartitionGroup consumedPartitions =
+new ConsumedPartitionGroup(
+Arrays.stream(ires.getPartitions())
+
.map(IntermediateResultPartition::getPartitionId)
+.collect(Collectors.toList()));
+for (ExecutionVertex ev : taskVertices) {
+ev.setConsumedPartitions(consumedPartitions, inputNumber);
+}
+
+ConsumerVertexGroup vertices =
+new ConsumerVertexGroup(
+Arrays.stream(taskVertices)
+.map(ExecutionVertex::getID)
+.collect(Collectors.toList()));
+for (IntermediateResultPartition partition : ires.getPartitions()) {
+partition.setConsumers(vertices);
+}
+}
+
+private static void connectPointwise(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+final int sourceCount = ires.getPartitions().length;
+final int targetCount = taskVertices.length;
+
+if (sourceCount == targetCount) {
+for (int i = 0; i < sourceCount; i++) {
+ExecutionVertex executionVertex = taskVertices[i];
+IntermediateResultPartition partition = 
ires.getPartitions()[i];
+
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+partition.setConsumers(consumerVertexGroup);
+
+ConsumedPartitionGroup consumedPartitionGroup =
+new ConsumedPartitionGroup(partition.getPartitionId());
+executionVertex.setConsumedPartitions(consumedPartitionGroup, 
inputNumber);
+}
+} else if (sourceCount > targetCount) {
+for (int index = 0; index < targetCount; index++) {
+
+ExecutionVertex executionVertex = taskVertices[index];
+ConsumerVertexGroup consumerVertexGroup =
+new ConsumerVertexGroup(executionVertex.getID());
+
+List consumedPartitions =
+new ArrayList<>(sourceCount / targetCount + 1);
+
+if (sourceCount % targetCount == 0) {

Review comment:
   Thank you for providing a better solution. I've replaced this part and 
validate several common cases.





This is an

[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-02-23 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r581008912



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/jobgraph/DistributionPattern.java
##
@@ -18,29 +18,15 @@
 
 package org.apache.flink.runtime.jobgraph;
 
-import org.apache.flink.runtime.executiongraph.ExecutionVertex;
-
 /**
  * A distribution pattern determines, which sub tasks of a producing task are 
connected to which
  * consuming sub tasks.

Review comment:
   Added. I'm wondering would it be better to say "how ExecutionVertex and 
IntermediateResultPartition is connected in {@link EdgeManagerBuildUtil}"?

##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/strategy/ConsumedPartitionGroup.java
##
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.scheduler.strategy;
+
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+
+import java.util.Collections;
+import java.util.List;
+
+/** Group of consumed {@link IntermediateResultPartitionID}s. */
+public class ConsumedPartitionGroup {
+private final List resultPartitions;
+
+public ConsumedPartitionGroup(List 
resultPartitions) {
+this.resultPartitions = resultPartitions;
+}
+
+public ConsumedPartitionGroup(IntermediateResultPartitionID 
resultPartition) {
+this(Collections.singletonList(resultPartition));
+}
+
+public List getResultPartitions() {
+return resultPartitions;

Review comment:
   Agreed. Done.

##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/strategy/ConsumerVertexGroup.java
##
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.scheduler.strategy;
+
+import java.util.Collections;
+import java.util.List;
+
+/** Group of consumer {@link ExecutionVertexID}s. */
+public class ConsumerVertexGroup {
+private final List vertices;
+
+public ConsumerVertexGroup(List vertices) {
+this.vertices = vertices;
+}
+
+public ConsumerVertexGroup(ExecutionVertexID vertex) {
+this(Collections.singletonList(vertex));
+}
+
+public List getVertices() {
+return vertices;

Review comment:
   Done.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-02-23 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r581003377



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/IntermediateResultPartition.java
##
@@ -90,21 +89,12 @@ void resetForNewExecution() {
 hasDataProduced = false;
 }
 
-int addConsumerGroup() {
-int pos = consumers.size();
-
-// NOTE: currently we support only one consumer per result!!!
-if (pos != 0) {
-throw new RuntimeException(
-"Currently, each intermediate result can only have one 
consumer.");
-}
-
-consumers.add(new ArrayList());
-return pos;
+public void setConsumers(ConsumerVertexGroup consumers) {
+
producer.getExecutionGraph().getEdgeManager().addPartitionConsumers(partitionId,
 consumers);

Review comment:
   Sorry for being careless. Done.

##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/IntermediateResultPartition.java
##
@@ -90,21 +89,12 @@ void resetForNewExecution() {
 hasDataProduced = false;
 }
 
-int addConsumerGroup() {
-int pos = consumers.size();
-
-// NOTE: currently we support only one consumer per result!!!
-if (pos != 0) {
-throw new RuntimeException(
-"Currently, each intermediate result can only have one 
consumer.");
-}
-
-consumers.add(new ArrayList());
-return pos;
+public void setConsumers(ConsumerVertexGroup consumers) {
+
producer.getExecutionGraph().getEdgeManager().addPartitionConsumers(partitionId,
 consumers);
 }
 
-void addConsumer(ExecutionEdge edge, int consumerNumber) {
-consumers.get(consumerNumber).add(edge);
+EdgeManager getEdgeManager() {

Review comment:
   Yes, it should be private. Thanks for point this out.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [flink] Thesharing commented on a change in pull request #14868: [FLINK-21326][runtime] Optimize building topology when initializing ExecutionGraph

2021-02-23 Thread GitBox


Thesharing commented on a change in pull request #14868:
URL: https://github.com/apache/flink/pull/14868#discussion_r581001291



##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/IntermediateResultPartition.java
##
@@ -32,25 +32,24 @@
 
 private final IntermediateResultPartitionID partitionId;
 
-private List> consumers;
-
 /** Whether this partition has produced some data. */
 private boolean hasDataProduced = false;
 
 public IntermediateResultPartition(
 IntermediateResult totalResult, ExecutionVertex producer, int 
partitionNumber) {
 this.totalResult = totalResult;
 this.producer = producer;
-this.consumers = new ArrayList>(0);
 this.partitionId = new 
IntermediateResultPartitionID(totalResult.getId(), partitionNumber);
+
+producer.getExecutionGraph().registerResultPartition(partitionId, 
this);
 }
 
 public ExecutionVertex getProducer() {
 return producer;
 }
 
 public int getPartitionNumber() {
-return partitionId.getPartitionNum();
+return partitionId.getPartitionNumber();

Review comment:
   Yes, I'll rearrange it once the PR is ready.

##
File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/EdgeManagerBuildUtil.java
##
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package org.apache.flink.runtime.executiongraph;
+
+import org.apache.flink.runtime.jobgraph.DistributionPattern;
+import org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID;
+import org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup;
+import org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup;
+import org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/** Utilities for building {@link EdgeManager}. */
+public class EdgeManagerBuildUtil {
+
+public static void connectVertexToResult(
+ExecutionVertex[] taskVertices,
+IntermediateResult ires,
+int inputNumber,
+DistributionPattern distributionPattern) {
+
+switch (distributionPattern) {
+case POINTWISE:
+connectPointwise(taskVertices, ires, inputNumber);
+break;
+case ALL_TO_ALL:
+connectAllToAll(taskVertices, ires, inputNumber);
+break;
+default:
+throw new RuntimeException("Unrecognized distribution 
pattern.");
+}
+}
+
+private static void connectAllToAll(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+ConsumedPartitionGroup consumedPartitions =
+new ConsumedPartitionGroup(
+Arrays.stream(ires.getPartitions())
+
.map(IntermediateResultPartition::getPartitionId)
+.collect(Collectors.toList()));
+for (ExecutionVertex ev : taskVertices) {
+ev.addConsumedPartitions(consumedPartitions, inputNumber);
+}
+
+ConsumerVertexGroup vertices =
+new ConsumerVertexGroup(
+Arrays.stream(taskVertices)
+.map(ExecutionVertex::getID)
+.collect(Collectors.toList()));
+for (IntermediateResultPartition partition : ires.getPartitions()) {
+partition.addConsumers(vertices);
+}
+}
+
+private static void connectPointwise(
+ExecutionVertex[] taskVertices, IntermediateResult ires, int 
inputNumber) {
+
+final int sourceCount = ires.getPartitions().length;
+final int targetCount = taskVertices.length;
+
+if (sourceCount == targetCount) {
+for (int i = 0; i < sourceCount; i++) {
+ExecutionVertex executionVertex = taskVertices[i];
+IntermediateResultPartition partition = 
ires.getPartitions()[i];
+
+ConsumerVertexGroup consumerVertexGroup =
+