YARN-4254. ApplicationAttempt stuck for ever due to UnknownHostException. Contributed by Bibin A Chundatt
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9bb2801e Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9bb2801e Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9bb2801e Branch: refs/heads/HDFS-12943 Commit: 9bb2801e8ce1e6298241944a65f593f555ae10e2 Parents: c968365 Author: Jason Lowe <jl...@apache.org> Authored: Fri Oct 5 15:52:46 2018 -0500 Committer: Jason Lowe <jl...@apache.org> Committed: Fri Oct 5 15:52:46 2018 -0500 ---------------------------------------------------------------------- .../hadoop/yarn/conf/YarnConfiguration.java | 9 +++- .../src/main/resources/yarn-default.xml | 5 +++ .../resourcemanager/ResourceTrackerService.java | 23 ++++++++++ .../TestResourceTrackerService.java | 45 ++++++++++++++++++++ 4 files changed, 81 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/9bb2801e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 95861d7..6488ebf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -541,7 +541,14 @@ public class YarnConfiguration extends Configuration { public static final String RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT = RM_PREFIX + "resource-tracker.client.thread-count"; public static final int DEFAULT_RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT = 50; - + + /** Check IP and hostname resolution during nodemanager registration.*/ + public static final String RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY = + RM_PREFIX + "resource-tracker.nm.ip-hostname-check"; + + public static final boolean DEFAULT_RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY = + false; + /** The class to use as the resource scheduler.*/ public static final String RM_SCHEDULER = RM_PREFIX + "scheduler.class"; http://git-wip-us.apache.org/repos/asf/hadoop/blob/9bb2801e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index e6f7b37..8e9f15b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -265,6 +265,11 @@ </property> <property> + <name>yarn.resourcemanager.resource-tracker.nm.ip-hostname-check</name> + <value>false</value> + </property> + + <property> <description>Are acls enabled.</description> <name>yarn.acl.enable</name> <value>false</value> http://git-wip-us.apache.org/repos/asf/hadoop/blob/9bb2801e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index b67172e..3d6eda2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager; import java.io.IOException; import java.io.InputStream; +import java.net.InetAddress; import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.Arrays; @@ -39,6 +40,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.Node; import org.apache.hadoop.security.authorize.PolicyProvider; import org.apache.hadoop.service.AbstractService; @@ -126,6 +128,7 @@ public class ResourceTrackerService extends AbstractService implements private DynamicResourceConfiguration drConf; private final AtomicLong timelineCollectorVersion = new AtomicLong(0); + private boolean checkIpHostnameInRegistration; public ResourceTrackerService(RMContext rmContext, NodesListManager nodesListManager, @@ -162,6 +165,9 @@ public class ResourceTrackerService extends AbstractService implements + " should be larger than 0."); } + checkIpHostnameInRegistration = conf.getBoolean( + YarnConfiguration.RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + YarnConfiguration.DEFAULT_RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY); minAllocMb = conf.getInt( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); @@ -350,6 +356,23 @@ public class ResourceTrackerService extends AbstractService implements } } + if (checkIpHostnameInRegistration) { + InetSocketAddress nmAddress = + NetUtils.createSocketAddrForHost(host, cmPort); + InetAddress inetAddress = Server.getRemoteIp(); + if (inetAddress != null && nmAddress.isUnresolved()) { + // Reject registration of unresolved nm to prevent resourcemanager + // getting stuck at allocations. + final String message = + "hostname cannot be resolved (ip=" + inetAddress.getHostAddress() + + ", hostname=" + host + ")"; + LOG.warn("Unresolved nodemanager registration: " + message); + response.setDiagnosticsMessage(message); + response.setNodeAction(NodeAction.SHUTDOWN); + return response; + } + } + // Check if this node is a 'valid' node if (!this.nodesListManager.isValidNode(host) && !isNodeInDecommissioning(nodeId)) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/9bb2801e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index e40b3c0..b451db1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -18,7 +18,10 @@ package org.apache.hadoop.yarn.server.resourcemanager; +import org.apache.hadoop.net.ServerSocketUtil; import org.apache.hadoop.yarn.nodelabels.NodeAttributeStore; +import org.apache.hadoop.yarn.server.api.ResourceTracker; +import org.apache.hadoop.yarn.server.api.ServerRMProxy; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.FileSystemNodeAttributeStore; import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; @@ -2402,4 +2405,46 @@ public class TestResourceTrackerService extends NodeLabelTestBase { Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); Assert.assertEquals(1, nodeHeartbeat.getResponseId()); } + + @Test + public void testNMIpHostNameResolution() throws Exception { + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, + "localhost:" + ServerSocketUtil.getPort(10000, 10)); + conf.setBoolean(YarnConfiguration.RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + true); + MockRM mockRM = new MockRM(conf) { + @Override + protected ResourceTrackerService createResourceTrackerService() { + return new ResourceTrackerService(getRMContext(), nodesListManager, + this.nmLivelinessMonitor, + rmContext.getContainerTokenSecretManager(), + rmContext.getNMTokenSecretManager()) { + }; + } + }; + mockRM.start(); + ResourceTracker rmTracker = + ServerRMProxy.createRMProxy(mockRM.getConfig(), ResourceTracker.class); + RegisterNodeManagerResponse response = rmTracker.registerNodeManager( + RegisterNodeManagerRequest.newInstance( + NodeId.newInstance("host1" + System.currentTimeMillis(), 1234), + 1236, Resource.newInstance(10000, 10), "2", new ArrayList<>(), + new ArrayList<>())); + Assert + .assertEquals("Shutdown signal should be received", NodeAction.SHUTDOWN, + response.getNodeAction()); + Assert.assertTrue("Diagnostic Message", response.getDiagnosticsMessage() + .contains("hostname cannot be resolved ")); + // Test success + rmTracker = + ServerRMProxy.createRMProxy(mockRM.getConfig(), ResourceTracker.class); + response = rmTracker.registerNodeManager(RegisterNodeManagerRequest + .newInstance(NodeId.newInstance("localhost", 1234), 1236, + Resource.newInstance(10000, 10), "2", new ArrayList<>(), + new ArrayList<>())); + Assert.assertEquals("Successfull registration", NodeAction.NORMAL, + response.getNodeAction()); + mockRM.stop(); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org