This is an automated email from the ASF dual-hosted git repository. jbrennan pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new 07a4220 HDFS-15821. Add metrics for in-service datanodes (#2690). Contributed by Zehao Chen. 07a4220 is described below commit 07a4220cd27c69b86b837e8da320bad0031f7895 Author: Zehao Chen <zeha...@gmail.com> AuthorDate: Mon Feb 15 11:14:32 2021 -0600 HDFS-15821. Add metrics for in-service datanodes (#2690). Contributed by Zehao Chen. --- .../federation/metrics/NamenodeBeanMetrics.java | 5 ++ .../hadoop/hdfs/server/namenode/FSNamesystem.java | 13 +++ .../server/namenode/metrics/FSNamesystemMBean.java | 6 ++ .../hdfs/server/namenode/TestNameNodeMXBean.java | 97 ++++++++++++++++++++++ 4 files changed, 121 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java index 8596d9db..45ae81e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java @@ -740,6 +740,11 @@ public class NamenodeBeanMetrics } @Override + public int getNumInServiceLiveDataNodes() { + return 0; + } + + @Override public int getVolumeFailuresTotal() { return 0; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 9c3dd25..d13e211 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -5580,6 +5580,19 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, } @Override // FSNamesystemMBean + @Metric({"NumInServiceLiveDataNodes", + "Number of live datanodes which are currently in service"}) + public int getNumInServiceLiveDataNodes() { + final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>(); + getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); + int liveInService = live.size(); + for (DatanodeDescriptor node : live) { + liveInService -= node.isInMaintenance() ? 1 : 0; + } + return liveInService; + } + + @Override // FSNamesystemMBean @Metric({"VolumeFailuresTotal", "Total number of volume failures across all Datanodes"}) public int getVolumeFailuresTotal() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java index 7dbddc2..7e5f108 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java @@ -151,6 +151,12 @@ public interface FSNamesystemMBean { public int getNumDecomDeadDataNodes(); /** + * @return Number of in-service data nodes, where NumInServiceDataNodes = + * NumLiveDataNodes - NumDecomLiveDataNodes - NumInMaintenanceLiveDataNodes + */ + int getNumInServiceLiveDataNodes(); + + /** * Number of failed data volumes across all live data nodes. * @return number of failed data volumes across all live data nodes */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java index 6180022..a309e32 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java @@ -435,6 +435,103 @@ public class TestNameNodeMXBean { } } + @Test(timeout = 120000) + public void testInServiceNodes() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, + 30); + conf.setClass(DFSConfigKeys.DFS_NAMENODE_HOSTS_PROVIDER_CLASSNAME_KEY, + CombinedHostFileManager.class, HostConfigManager.class); + MiniDFSCluster cluster = null; + HostsFileWriter hostsFileWriter = new HostsFileWriter(); + hostsFileWriter.initialize(conf, "temp/TestInServiceNodes"); + + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); + cluster.waitActive(); + + final FSNamesystem fsn = cluster.getNameNode().namesystem; + final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + final ObjectName mxbeanName = new ObjectName( + "Hadoop:service=NameNode,name=FSNamesystem"); + + List<String> hosts = new ArrayList<>(); + for (DataNode dn : cluster.getDataNodes()) { + hosts.add(dn.getDisplayName()); + } + hostsFileWriter.initIncludeHosts(hosts.toArray( + new String[hosts.size()])); + fsn.getBlockManager().getDatanodeManager().refreshNodes(conf); + + GenericTestUtils.waitFor(new Supplier<Boolean>() { + @Override + public Boolean get() { + try { + int numLiveDataNodes = (int) mbs.getAttribute(mxbeanName, + "NumLiveDataNodes"); + return numLiveDataNodes == 3; + } catch (Exception e) { + return false; + } + } + }, 1000, 60000); + + // Verify nodes + int numDecomLiveDataNodes = (int) mbs.getAttribute(mxbeanName, + "NumDecomLiveDataNodes"); + int numInMaintenanceLiveDataNodes = (int) mbs.getAttribute(mxbeanName, + "NumInMaintenanceLiveDataNodes"); + int numInServiceLiveDataNodes = (int) mbs.getAttribute(mxbeanName, + "NumInServiceLiveDataNodes"); + assertEquals(0, numDecomLiveDataNodes); + assertEquals(0, numInMaintenanceLiveDataNodes); + assertEquals(3, numInServiceLiveDataNodes); + + // Add 2 nodes to out-of-service list + ArrayList<String> decomNodes = new ArrayList<>(); + decomNodes.add(cluster.getDataNodes().get(0).getDisplayName()); + + Map<String, Long> maintenanceNodes = new HashMap<>(); + final int expirationInMs = 30 * 1000; + maintenanceNodes.put(cluster.getDataNodes().get(1).getDisplayName(), + Time.now() + expirationInMs); + + hostsFileWriter.initOutOfServiceHosts(decomNodes, maintenanceNodes); + fsn.getBlockManager().getDatanodeManager().refreshNodes(conf); + + // Wait for the DatanodeAdminManager to complete check + GenericTestUtils.waitFor(new Supplier<Boolean>() { + @Override + public Boolean get() { + try { + int numLiveDataNodes = (int) mbs.getAttribute(mxbeanName, + "NumLiveDataNodes"); + int numDecomLiveDataNodes = (int) mbs.getAttribute(mxbeanName, + "NumDecomLiveDataNodes"); + int numInMaintenanceLiveDataNodes = (int) mbs.getAttribute( + mxbeanName, "NumInMaintenanceLiveDataNodes"); + return numLiveDataNodes == 3 && + numDecomLiveDataNodes == 1 && + numInMaintenanceLiveDataNodes == 1; + } catch (Exception e) { + return false; + } + } + }, 1000, 60000); + + // Verify nodes + numInServiceLiveDataNodes = (int) mbs.getAttribute(mxbeanName, + "NumInServiceLiveDataNodes"); + assertEquals(1, numInServiceLiveDataNodes); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + hostsFileWriter.cleanup(); + } + } + @Test (timeout = 120000) public void testMaintenanceNodes() throws Exception { LOG.info("Starting testMaintenanceNodes"); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org