GJL closed pull request #6775: [FLINK-5542] use YarnCluster vcores setting to do MaxVCore validation URL: https://github.com/apache/flink/pull/6775
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/flink-yarn/src/main/java/org/apache/flink/yarn/AbstractYarnClusterDescriptor.java b/flink-yarn/src/main/java/org/apache/flink/yarn/AbstractYarnClusterDescriptor.java index c3ad9f7f42c..31aaadfbfb8 100644 --- a/flink-yarn/src/main/java/org/apache/flink/yarn/AbstractYarnClusterDescriptor.java +++ b/flink-yarn/src/main/java/org/apache/flink/yarn/AbstractYarnClusterDescriptor.java @@ -282,18 +282,29 @@ private void isReadyForDeployment(ClusterSpecification clusterSpecification) thr } // Check if we don't exceed YARN's maximum virtual cores. - // The number of cores can be configured in the config. - // If not configured, it is set to the number of task slots - int numYarnVcores = yarnConfiguration.getInt(YarnConfiguration.NM_VCORES, YarnConfiguration.DEFAULT_NM_VCORES); + // Fetch numYarnMaxVcores from all the RUNNING nodes via yarnClient + int numYarnMaxVcores = Integer.MIN_VALUE; + try { + List<NodeReport> nodes = yarnClient.getNodeReports(NodeState.RUNNING); + for (NodeReport rep : nodes) { + final Resource res = rep.getCapability(); + if (res.getVirtualCores() > numYarnMaxVcores) { + numYarnMaxVcores = res.getVirtualCores(); + } + } + } catch (Exception e) { + throw new YarnDeploymentException("Couldn't get cluster description, please check on the YarnConfiguration", e); + } + int configuredVcores = flinkConfiguration.getInteger(YarnConfigOptions.VCORES, clusterSpecification.getSlotsPerTaskManager()); // don't configure more than the maximum configured number of vcores - if (configuredVcores > numYarnVcores) { + if (configuredVcores > numYarnMaxVcores) { throw new IllegalConfigurationException( - String.format("The number of virtual cores per node were configured with %d" + - " but Yarn only has %d virtual cores available. Please note that the number" + - " of virtual cores is set to the number of task slots by default unless configured" + - " in the Flink config with '%s.'", - configuredVcores, numYarnVcores, YarnConfigOptions.VCORES.key())); + String.format("The number of requested virtual cores per node %d" + + " exceeds the maximum number virtual cores %d available in the Yarn Cluster." + + " Please note that the number of virtual cores is set to the number of task slots by default" + + " unless configured in the Flink config with '%s.'", + configuredVcores, numYarnMaxVcores, YarnConfigOptions.VCORES.key())); } // check if required Hadoop environment variables are set. If not, warn user ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services