Github user tillrohrmann commented on a diff in the pull request: https://github.com/apache/flink/pull/2744#discussion_r90622537 --- Diff: flink-yarn/src/main/java/org/apache/flink/yarn/YarnFlinkApplicationMasterRunner.java --- @@ -0,0 +1,316 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.yarn; + +import akka.actor.ActorSystem; +import org.apache.flink.api.common.JobExecutionResult; +import org.apache.flink.api.common.time.Time; +import org.apache.flink.configuration.ConfigConstants; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.runtime.akka.AkkaUtils; +import org.apache.flink.runtime.client.JobExecutionException; +import org.apache.flink.runtime.clusterframework.ApplicationStatus; +import org.apache.flink.runtime.clusterframework.BootstrapTools; +import org.apache.flink.runtime.highavailability.HighAvailabilityServices; +import org.apache.flink.runtime.highavailability.HighAvailabilityServicesUtils; +import org.apache.flink.runtime.jobgraph.JobGraph; +import org.apache.flink.runtime.jobmanager.OnCompletionActions; +import org.apache.flink.runtime.jobmaster.JobManagerRunner; +import org.apache.flink.runtime.metrics.MetricRegistry; +import org.apache.flink.runtime.metrics.MetricRegistryConfiguration; +import org.apache.flink.runtime.resourcemanager.JobLeaderIdService; +import org.apache.flink.runtime.resourcemanager.ResourceManager; +import org.apache.flink.runtime.resourcemanager.ResourceManagerConfiguration; +import org.apache.flink.runtime.resourcemanager.exceptions.ConfigurationException; +import org.apache.flink.runtime.resourcemanager.slotmanager.DefaultSlotManager; +import org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerFactory; +import org.apache.flink.runtime.rpc.FatalErrorHandler; +import org.apache.flink.runtime.rpc.RpcService; +import org.apache.flink.runtime.rpc.akka.AkkaRpcService; +import org.apache.flink.runtime.util.EnvironmentInformation; +import org.apache.flink.runtime.util.JvmShutdownSafeguard; +import org.apache.flink.runtime.util.SignalHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.concurrent.duration.FiniteDuration; + +import javax.annotation.concurrent.GuardedBy; + +import java.io.File; +import java.io.FileInputStream; +import java.io.ObjectInputStream; + +/** + * This class is the executable entry point for the YARN application master. + * It starts actor system and the actors for {@link org.apache.flink.runtime.jobmaster.JobManagerRunner} + * and {@link org.apache.flink.yarn.YarnResourceManager}. + * + * The JobMasnagerRunner start a {@link org.apache.flink.runtime.jobmaster.JobMaster} + * JobMaster handles Flink job execution, while the YarnResourceManager handles container + * allocation and failure detection. + */ +public class YarnFlinkApplicationMasterRunner extends AbstractYarnFlinkApplicationMasterRunner + implements OnCompletionActions, FatalErrorHandler { + + /** Logger */ + protected static final Logger LOG = LoggerFactory.getLogger(YarnFlinkApplicationMasterRunner.class); + + /** The job graph file path */ + private static final String JOB_GRAPH_FILE_PATH = "flink.jobgraph.path"; + + /** The lock to guard startup / shutdown / manipulation methods */ + private final Object lock = new Object(); + + @GuardedBy("lock") + private MetricRegistry metricRegistry; + + @GuardedBy("lock") + private HighAvailabilityServices haServices; + + @GuardedBy("lock") + private RpcService commonRpcService; + + @GuardedBy("lock") + private ResourceManager resourceManager; + + @GuardedBy("lock") + private JobManagerRunner jobManagerRunner; + + @GuardedBy("lock") + private JobGraph jobGraph; + + // ------------------------------------------------------------------------ + // Program entry point + // ------------------------------------------------------------------------ + + /** + * The entry point for the YARN application master. + * + * @param args The command line arguments. + */ + public static void main(String[] args) { + EnvironmentInformation.logEnvironmentInfo(LOG, "YARN ApplicationMaster runner", args); + SignalHandler.register(LOG); + JvmShutdownSafeguard.installAsShutdownHook(LOG); + + // run and exit with the proper return code + int returnCode = new YarnFlinkApplicationMasterRunner().run(args); + System.exit(returnCode); + } + + @Override + protected int runApplicationMaster(Configuration config) { + + try { + // ---- (1) create common services + + // try to start the rpc service + // using the port range definition from the config. + final String amPortRange = config.getString( + ConfigConstants.YARN_APPLICATION_MASTER_PORT, + ConfigConstants.DEFAULT_YARN_JOB_MANAGER_PORT); + + synchronized (lock) { + haServices = HighAvailabilityServicesUtils.createAvailableOrEmbeddedServices(config); + metricRegistry = new MetricRegistry(MetricRegistryConfiguration.fromConfiguration(config)); + commonRpcService = createRpcService(config, appMasterHostname, amPortRange); + + // ---- (2) init resource manager ------- + resourceManager = createResourceManager(config); + + // ---- (3) init job master parameters + jobManagerRunner = createJobManagerRunner(config); + + // ---- (4) start the resource manager and job manager runner: + resourceManager.start(); + LOG.debug("YARN Flink Resource Manager started"); + + jobManagerRunner.start(); + LOG.debug("Job Manager Runner started"); + + // ---- (5) start the web monitor + // TODO: add web monitor + } + + // wait for resource manager to finish + resourceManager.getTerminationFuture().get(); + // everything started, we can wait until all is done or the process is killed + LOG.info("YARN Application Master finished"); + } + catch (Throwable t) { + // make sure that everything whatever ends up in the log + LOG.error("YARN Application Master initialization failed", t); + shutdown(ApplicationStatus.FAILED, t.getMessage()); + return INIT_ERROR_EXIT_CODE; + } + + return 0; + } + + // ------------------------------------------------------------------------ + // Utilities + // ------------------------------------------------------------------------ + + protected RpcService createRpcService( + Configuration configuration, + String bindAddress, + String portRange) throws Exception{ + ActorSystem actorSystem = BootstrapTools.startActorSystem(configuration, bindAddress, portRange, LOG); + FiniteDuration duration = AkkaUtils.getTimeout(configuration); + return new AkkaRpcService(actorSystem, Time.of(duration.length(), duration.unit())); --- End diff -- Maybe you could extend `RcpServiceUtils.createRpcService` to also support port ranges.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---