This is an automated email from the ASF dual-hosted git repository.

tasanuma pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new a6aa2925fb64 HDFS-17333. DFSClient supports lazy resolution from 
hostname to IP. (#6430)
a6aa2925fb64 is described below

commit a6aa2925fb64362e6ec5ff1b4d769bdf7a2d0f50
Author: Jian Zhang <1361320...@qq.com>
AuthorDate: Sat Mar 2 20:35:24 2024 +0800

    HDFS-17333. DFSClient supports lazy resolution from hostname to IP. (#6430)
    
    Signed-off-by: Takanobu Asanuma <tasan...@apache.org>
---
 .../main/java/org/apache/hadoop/net/NetUtils.java  | 15 +++-
 .../java/org/apache/hadoop/hdfs/DFSUtilClient.java | 19 +++--
 .../hadoop/hdfs/client/HdfsClientConfigKeys.java   |  2 +
 .../ha/AbstractNNFailoverProxyProvider.java        | 26 +++++++
 .../ha/TestConfiguredFailoverProxyProvider.java    | 82 +++++++++++++++++++++-
 .../src/main/resources/hdfs-default.xml            |  9 +++
 .../java/org/apache/hadoop/hdfs/TestDFSUtil.java   | 47 +++++++++++++
 7 files changed, 193 insertions(+), 7 deletions(-)

diff --git 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
index c49706d66f27..a647bb041066 100644
--- 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
+++ 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
@@ -163,6 +163,10 @@ public class NetUtils {
     return createSocketAddr(target, -1);
   }
 
+  public static InetSocketAddress createSocketAddrUnresolved(String target) {
+    return createSocketAddr(target, -1, null, false, false);
+  }
+
   /**
    * Util method to build socket addr from either.
    *   {@literal <host>}
@@ -219,6 +223,12 @@ public class NetUtils {
                                                    int defaultPort,
                                                    String configName,
                                                    boolean useCacheIfPresent) {
+    return createSocketAddr(target, defaultPort, configName, 
useCacheIfPresent, true);
+  }
+
+  public static InetSocketAddress createSocketAddr(
+      String target, int defaultPort, String configName,
+      boolean useCacheIfPresent, boolean isResolved) {
     String helpText = "";
     if (configName != null) {
       helpText = " (configuration property '" + configName + "')";
@@ -244,7 +254,10 @@ public class NetUtils {
           "Does not contain a valid host:port authority: " + target + helpText
       );
     }
-    return createSocketAddrForHost(host, port);
+    if (isResolved) {
+      return createSocketAddrForHost(host, port);
+    }
+    return InetSocketAddress.createUnresolved(host, port);
   }
 
   private static final long URI_CACHE_SIZE_DEFAULT = 1000;
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java
 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java
index b2fc472aad83..ea001fa2d3a8 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java
@@ -107,6 +107,8 @@ import static 
org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_HA_NAMENODE
 import static 
org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_NAMENODE_RPC_ADDRESS_AUXILIARY_KEY;
 import static 
org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
 import static 
org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_NAMESERVICES;
+import static 
org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Failover.DFS_CLIENT_LAZY_RESOLVED;
+import static 
org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Failover.DFS_CLIENT_LAZY_RESOLVED_DEFAULT;
 
 @InterfaceAudience.Private
 public class DFSUtilClient {
@@ -530,11 +532,18 @@ public class DFSUtilClient {
       String suffix = concatSuffixes(nsId, nnId);
       String address = checkKeysAndProcess(defaultValue, suffix, conf, keys);
       if (address != null) {
-        InetSocketAddress isa = NetUtils.createSocketAddr(address);
-        if (isa.isUnresolved()) {
-          LOG.warn("Namenode for {} remains unresolved for ID {}. Check your "
-              + "hdfs-site.xml file to ensure namenodes are configured "
-              + "properly.", nsId, nnId);
+        InetSocketAddress isa = null;
+        // There is no need to resolve host->ip in advance.
+        // Delay the resolution until the host is used.
+        if (conf.getBoolean(DFS_CLIENT_LAZY_RESOLVED, 
DFS_CLIENT_LAZY_RESOLVED_DEFAULT)) {
+          isa = NetUtils.createSocketAddrUnresolved(address);
+        }else {
+          isa = NetUtils.createSocketAddr(address);
+          if (isa.isUnresolved()) {
+            LOG.warn("Namenode for {} remains unresolved for ID {}. Check your 
"
+                + "hdfs-site.xml file to ensure namenodes are configured "
+                + "properly.", nsId, nnId);
+          }
         }
         ret.put(nnId, isa);
       }
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java
 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java
index efaa5601ad81..204453050675 100755
--- 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java
@@ -397,6 +397,8 @@ public interface HdfsClientConfigKeys {
     String RESOLVE_SERVICE_KEY = PREFIX + "resolver.impl";
     String  RESOLVE_ADDRESS_TO_FQDN = PREFIX + "resolver.useFQDN";
     boolean RESOLVE_ADDRESS_TO_FQDN_DEFAULT = true;
+    String DFS_CLIENT_LAZY_RESOLVED = PREFIX + "lazy.resolved";
+    boolean DFS_CLIENT_LAZY_RESOLVED_DEFAULT = false;
   }
 
   /** dfs.client.write configuration properties */
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
index ec4c22ecb5c1..f052eae3e0e9 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
@@ -37,10 +37,14 @@ import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.net.DomainNameResolver;
 import org.apache.hadoop.net.DomainNameResolverFactory;
+import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static 
org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Failover.DFS_CLIENT_LAZY_RESOLVED;
+import static 
org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.Failover.DFS_CLIENT_LAZY_RESOLVED_DEFAULT;
+
 public abstract class AbstractNNFailoverProxyProvider<T> implements
     FailoverProxyProvider <T> {
   protected static final Logger LOG =
@@ -138,6 +142,10 @@ public abstract class AbstractNNFailoverProxyProvider<T> 
implements
     public HAServiceState getCachedState() {
       return cachedState;
     }
+
+    public void setAddress(InetSocketAddress address) {
+      this.address = address;
+    }
   }
 
   @Override
@@ -152,6 +160,24 @@ public abstract class AbstractNNFailoverProxyProvider<T> 
implements
     if (pi.proxy == null) {
       assert pi.getAddress() != null : "Proxy address is null";
       try {
+        InetSocketAddress address = pi.getAddress();
+        // If the host is not resolved to IP and lazy.resolved=true,
+        // the host needs to be resolved.
+        if (address.isUnresolved()) {
+          if (conf.getBoolean(DFS_CLIENT_LAZY_RESOLVED, 
DFS_CLIENT_LAZY_RESOLVED_DEFAULT)) {
+            InetSocketAddress isa =
+                NetUtils.createSocketAddrForHost(address.getHostName(), 
address.getPort());
+            if (isa.isUnresolved()) {
+              LOG.warn("Can not resolve host {}, check your hdfs-site.xml file 
" +
+                  "to ensure host are configured correctly.", 
address.getHostName());
+            }
+            pi.setAddress(isa);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Lazy resolve host {} -> {}, when create proxy if 
needed.",
+                  address.toString(), pi.getAddress().toString());
+            }
+          }
+        }
         pi.proxy = factory.createProxy(conf,
             pi.getAddress(), xface, ugi, false, getFallbackToSimpleAuth());
       } catch (IOException ioe) {
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestConfiguredFailoverProxyProvider.java
 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestConfiguredFailoverProxyProvider.java
index c198536d01a2..a04e779e8004 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestConfiguredFailoverProxyProvider.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestConfiguredFailoverProxyProvider.java
@@ -44,6 +44,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.mock;
@@ -60,6 +61,7 @@ public class TestConfiguredFailoverProxyProvider {
   private URI ns1Uri;
   private URI ns2Uri;
   private URI ns3Uri;
+  private URI ns4Uri;
   private String ns1;
   private String ns1nn1Hostname = "machine1.foo.bar";
   private InetSocketAddress ns1nn1 =
@@ -79,6 +81,9 @@ public class TestConfiguredFailoverProxyProvider {
       new InetSocketAddress(ns2nn3Hostname, rpcPort);
   private String ns3;
   private static final int NUM_ITERATIONS = 50;
+  private String ns4;
+  private String ns4nn1Hostname = "localhost";
+  private String ns4nn2Hostname = "127.0.0.1";
 
   @Rule
   public final ExpectedException exception = ExpectedException.none();
@@ -133,8 +138,11 @@ public class TestConfiguredFailoverProxyProvider {
     ns3 = "mycluster-3-" + Time.monotonicNow();
     ns3Uri = new URI("hdfs://" + ns3);
 
+    ns4 = "mycluster-4-" + Time.monotonicNow();
+    ns4Uri = new URI("hdfs://" + ns4);
+
     conf.set(HdfsClientConfigKeys.DFS_NAMESERVICES,
-        String.join(",", ns1, ns2, ns3));
+        String.join(",", ns1, ns2, ns3, ns4));
     conf.set("fs.defaultFS", "hdfs://" + ns1);
   }
 
@@ -170,6 +178,33 @@ public class TestConfiguredFailoverProxyProvider {
     );
   }
 
+  /**
+   * Add more LazyResolved related settings to the passed in configuration.
+   */
+  private void addLazyResolvedSettings(Configuration config, boolean isLazy) {
+    config.set(
+        HdfsClientConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX + "." + ns4,
+        "nn1,nn2,nn3");
+    config.set(
+        HdfsClientConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY + "." + ns4 + ".nn1",
+        ns4nn1Hostname + ":" + rpcPort);
+    config.set(
+        HdfsClientConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY + "." + ns4 + ".nn2",
+        ns4nn2Hostname + ":" + rpcPort);
+    config.set(
+        HdfsClientConfigKeys.Failover.PROXY_PROVIDER_KEY_PREFIX + "." + ns4,
+        ConfiguredFailoverProxyProvider.class.getName());
+    if (isLazy) {
+      // Set  dfs.client.failover.lazy.resolved=true (default false).
+      config.setBoolean(
+          HdfsClientConfigKeys.Failover.DFS_CLIENT_LAZY_RESOLVED,
+          true);
+    }
+    config.setBoolean(
+        HdfsClientConfigKeys.Failover.RANDOM_ORDER + "." + ns4,
+        false);
+  }
+
   /**
    * Tests getProxy with random.order configuration set to false.
    * This expects the proxy order to be consistent every time a new
@@ -330,6 +365,51 @@ public class TestConfiguredFailoverProxyProvider {
     testResolveDomainNameUsingDNS(true);
   }
 
+  @Test
+  public void testLazyResolved() throws IOException {
+    // Not lazy resolved.
+    testLazyResolved(false);
+    // Lazy resolved.
+    testLazyResolved(true);
+  }
+
+  private void testLazyResolved(boolean isLazy) throws IOException {
+    Configuration lazyResolvedConf = new Configuration(conf);
+    addLazyResolvedSettings(lazyResolvedConf, isLazy);
+    Map<InetSocketAddress, ClientProtocol> proxyMap = new HashMap<>();
+
+    InetSocketAddress ns4nn1 = new InetSocketAddress(ns4nn1Hostname, rpcPort);
+    InetSocketAddress ns4nn2 = new InetSocketAddress(ns4nn2Hostname, rpcPort);
+
+    // Mock ClientProtocol
+    final ClientProtocol nn1Mock = mock(ClientProtocol.class);
+    when(nn1Mock.getStats()).thenReturn(new long[]{0});
+    proxyMap.put(ns4nn1, nn1Mock);
+
+    final ClientProtocol nn2Mock = mock(ClientProtocol.class);
+    when(nn1Mock.getStats()).thenReturn(new long[]{0});
+    proxyMap.put(ns4nn2, nn2Mock);
+
+    ConfiguredFailoverProxyProvider<ClientProtocol> provider =
+        new ConfiguredFailoverProxyProvider<>(lazyResolvedConf, ns4Uri,
+            ClientProtocol.class, createFactory(proxyMap));
+    assertEquals(2, provider.proxies.size());
+    for (AbstractNNFailoverProxyProvider.NNProxyInfo proxyInfo : 
provider.proxies) {
+      if (isLazy) {
+        // If lazy resolution is used, and the proxy is not used at this time,
+        // so the host is not resolved.
+        assertTrue(proxyInfo.getAddress().isUnresolved());
+      }else {
+        assertFalse(proxyInfo.getAddress().isUnresolved());
+      }
+    }
+
+    // When the host is used to process the request, the host is resolved.
+    ClientProtocol proxy = provider.getProxy().proxy;
+    proxy.getStats();
+    assertFalse(provider.proxies.get(0).getAddress().isUnresolved());
+  }
+
   @Test
   public void testResolveDomainNameUsingDNSUnknownHost() throws Exception {
     Configuration dnsConf = new Configuration(conf);
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 174f7242bfbc..e6dc8c5ba1ac 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -4469,6 +4469,15 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.client.failover.lazy.resolved</name>
+  <value>false</value>
+  <description>
+    Used to enable lazy resolution of host->ip. If the value is true,
+    the host will only be resolved only before Dfsclient needs to request the 
host.
+  </description>
+</property>
+
 <property>
   <name>dfs.client.key.provider.cache.expiry</name>
   <value>864000000</value>
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index 4bdb405e4da0..3837a6aab43b 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -1159,4 +1159,51 @@ public class TestDFSUtil {
     assertEquals(102_400_000,
         DFSUtil.getTransferRateInBytesPerSecond(512_000_000L, 5_000_000_000L));
   }
+
+  @Test
+  public void testLazyResolved() {
+    // Not lazy resolved.
+    testLazyResolved(false);
+    // Lazy resolved.
+    testLazyResolved(true);
+  }
+
+  private void testLazyResolved(boolean isLazy) {
+    final String ns1Nn1 = "localhost:8020";
+    final String ns1Nn2 = "127.0.0.1:8020";
+    final String ns2Nn1 = "127.0.0.2:8020";
+    final String ns2Nn2 = "127.0.0.3:8020";
+
+    HdfsConfiguration conf = new HdfsConfiguration();
+
+    conf.set(DFS_NAMESERVICES, "ns1,ns2");
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns1"), 
"nn1,nn2");
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn1"), ns1Nn1);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn2"), ns1Nn2);
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns2"), 
"nn1,nn2");
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns2", "nn1"), ns2Nn1);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns2", "nn2"), ns2Nn2);
+
+    conf.setBoolean(HdfsClientConfigKeys.Failover.DFS_CLIENT_LAZY_RESOLVED, 
isLazy);
+
+    Map<String, Map<String, InetSocketAddress>> addresses =
+        DFSUtilClient.getAddresses(conf, null, DFS_NAMENODE_RPC_ADDRESS_KEY);
+
+    addresses.forEach((ns, inetSocketAddressMap) -> {
+      inetSocketAddressMap.forEach((nn, inetSocketAddress) -> {
+        if (isLazy) {
+          // Lazy resolved. There is no need to change host->ip in advance.
+          assertTrue(inetSocketAddress.isUnresolved());
+        }else {
+          // Need resolve all host->ip.
+          assertFalse(inetSocketAddress.isUnresolved());
+        }
+        assertEquals(inetSocketAddress.getPort(), 8020);
+      });
+    });
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to